toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmnU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "invlist_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g, h)  \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char* const ident_too_long = "Identifier too long";
  97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
 117
 118 #define SPACE_OR_TAB(c) isBLANK_A(c)
 119
 120 #define HEXFP_PEEK(s)     \
 121     (((s[0] == '.') && \
 122       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 123      isALPHA_FOLD_EQ(s[0], 'p'))
 124
 125 /* LEX_* are values for PL_lex_state, the state of the lexer.
 126  * They are arranged oddly so that the guard on the switch statement
 127  * can get by with a single comparison (if the compiler is smart enough).
 128  *
 129  * These values refer to the various states within a sublex parse,
 130  * i.e. within a double quotish string
 131  */
 132
 133 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 134
 135 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 136 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 137 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 138 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 139 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 140
 141                                    /* at end of code, eg "$x" followed by:  */
 142 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 143 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 144
 145 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 146                                         string or after \E, $foo, etc       */
 147 #define LEX_INTERPCONST          2 /* NOT USED */
 148 #define LEX_FORMLINE             1 /* expecting a format line               */
 149
 150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
 151    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
 152    can also return it.
 153
 154    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
 155    other token values are 258 or higher (see perly.h), so -1 should be
 156    a safe value here.
 157 */
 158 #define YYL_RETRY (-1)
 159
 160 #ifdef DEBUGGING
 161 static const char* const lex_state_names[] = {
 162     "KNOWNEXT",
 163     "FORMLINE",
 164     "INTERPCONST",
 165     "INTERPCONCAT",
 166     "INTERPENDMAYBE",
 167     "INTERPEND",
 168     "INTERPSTART",
 169     "INTERPPUSH",
 170     "INTERPCASEMOD",
 171     "INTERPNORMAL",
 172     "NORMAL"
 173 };
 174 #endif
 175
 176 #include "keywords.h"
 177
 178 /* CLINE is a macro that ensures PL_copline has a sane value */
 179
 180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 181
 182 /*
 183  * Convenience functions to return different tokens and prime the
 184  * lexer for the next token.  They all take an argument.
 185  *
 186  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 187  * OPERATOR     : generic operator
 188  * AOPERATOR    : assignment operator
 189  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 190  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 191  * PREREF       : *EXPR where EXPR is not a simple identifier
 192  * TERM         : expression term
 193  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 194  * LOOPX        : loop exiting command (goto, last, dump, etc)
 195  * FTST         : file test operator
 196  * FUN0         : zero-argument function
 197  * FUN0OP       : zero-argument function, with its op created in this file
 198  * FUN1         : not used, except for not, which isn't a UNIOP
 199  * BOop         : bitwise or or xor
 200  * BAop         : bitwise and
 201  * BCop         : bitwise complement
 202  * SHop         : shift operator
 203  * PWop         : power operator
 204  * PMop         : pattern-matching operator
 205  * Aop          : addition-level operator
 206  * AopNOASSIGN  : addition-level operator that is never part of .=
 207  * Mop          : multiplication-level operator
 208  * ChEop        : chaining equality-testing operator
 209  * NCEop        : non-chaining comparison operator at equality precedence
 210  * ChRop        : chaining relational operator <= != gt
 211  * NCRop        : non-chaining relational operator isa
 212  *
 213  * Also see LOP and lop() below.
 214  */
 215
 216 #ifdef DEBUGGING /* Serve -DT. */
 217 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 218 #else
 219 #   define REPORT(retval) (retval)
 220 #endif
 221
 222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 231                          pl_yylval.ival=f, \
 232                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 233                          REPORT((int)LOOPEX))
 234 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 235 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 236 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 237 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 238 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 239 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 241                        REPORT('~')
 242 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 243 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 244 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 245 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 247 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
 249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
 250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
 251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
 252
 253 /* This bit of chicanery makes a unary function followed by
 254  * a parenthesis into a function with one argument, highest precedence.
 255  * The UNIDOR macro is for unary functions that can be followed by the //
 256  * operator (such as C<shift // 0>).
 257  */
 258 #define UNI3(f,x,have_x) { \
 259         pl_yylval.ival = f; \
 260         if (have_x) PL_expect = x; \
 261         PL_bufptr = s; \
 262         PL_last_uni = PL_oldbufptr; \
 263         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 264         if (*s == '(') \
 265             return REPORT( (int)FUNC1 ); \
 266         s = skipspace(s); \
 267         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 268         }
 269 #define UNI(f)    UNI3(f,XTERM,1)
 270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 271 #define UNIPROTO(f,optional) { \
 272         if (optional) PL_last_uni = PL_oldbufptr; \
 273         OPERATOR(f); \
 274         }
 275
 276 #define UNIBRACK(f) UNI3(f,0,0)
 277
 278 /* grandfather return to old style */
 279 #define OLDLOP(f) \
 280         do { \
 281             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 282                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 283             pl_yylval.ival = (f); \
 284             PL_expect = XTERM; \
 285             PL_bufptr = s; \
 286             return (int)LSTOP; \
 287         } while(0)
 288
 289 #define COPLINE_INC_WITH_HERELINES                  \
 290     STMT_START {                                     \
 291         CopLINE_inc(PL_curcop);                       \
 292         if (PL_parser->herelines)                      \
 293             CopLINE(PL_curcop) += PL_parser->herelines, \
 294             PL_parser->herelines = 0;                    \
 295     } STMT_END
 296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 297  * is no sublex_push to follow. */
 298 #define COPLINE_SET_FROM_MULTI_END            \
 299     STMT_START {                               \
 300         CopLINE_set(PL_curcop, PL_multi_end);   \
 301         if (PL_multi_end != PL_multi_start)      \
 302             PL_parser->herelines = 0;             \
 303     } STMT_END
 304
 305
 306 /* A file-local structure for passing around information about subroutines and
 307  * related definable words */
 308 struct code {
 309     SV *sv;
 310     CV *cv;
 311     GV *gv, **gvp;
 312     OP *rv2cv_op;
 313     PADOFFSET off;
 314     bool lex;
 315 };
 316
 317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
 318
 319 #ifdef DEBUGGING
 320
 321 /* how to interpret the pl_yylval associated with the token */
 322 enum token_type {
 323     TOKENTYPE_NONE,
 324     TOKENTYPE_IVAL,
 325     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 326     TOKENTYPE_PVAL,
 327     TOKENTYPE_OPVAL
 328 };
 329
 330 static struct debug_tokens {
 331     const int token;
 332     enum token_type type;
 333     const char *name;
 334 } const debug_tokens[] =
 335 {
 336     { ADDOP,            TOKENTYPE_OPNUM,        "ADDOP" },
 337     { ANDAND,           TOKENTYPE_NONE,         "ANDAND" },
 338     { ANDOP,            TOKENTYPE_NONE,         "ANDOP" },
 339     { ANONSUB,          TOKENTYPE_IVAL,         "ANONSUB" },
 340     { ANON_SIGSUB,      TOKENTYPE_IVAL,         "ANON_SIGSUB" },
 341     { ARROW,            TOKENTYPE_NONE,         "ARROW" },
 342     { ASSIGNOP,         TOKENTYPE_OPNUM,        "ASSIGNOP" },
 343     { BITANDOP,         TOKENTYPE_OPNUM,        "BITANDOP" },
 344     { BITOROP,          TOKENTYPE_OPNUM,        "BITOROP" },
 345     { CHEQOP,           TOKENTYPE_OPNUM,        "CHEQOP" },
 346     { CHRELOP,          TOKENTYPE_OPNUM,        "CHRELOP" },
 347     { COLONATTR,        TOKENTYPE_NONE,         "COLONATTR" },
 348     { CONTINUE,         TOKENTYPE_NONE,         "CONTINUE" },
 349     { DEFAULT,          TOKENTYPE_NONE,         "DEFAULT" },
 350     { DO,               TOKENTYPE_NONE,         "DO" },
 351     { DOLSHARP,         TOKENTYPE_NONE,         "DOLSHARP" },
 352     { DORDOR,           TOKENTYPE_NONE,         "DORDOR" },
 353     { DOROP,            TOKENTYPE_OPNUM,        "DOROP" },
 354     { DOTDOT,           TOKENTYPE_IVAL,         "DOTDOT" },
 355     { ELSE,             TOKENTYPE_NONE,         "ELSE" },
 356     { ELSIF,            TOKENTYPE_IVAL,         "ELSIF" },
 357     { FOR,              TOKENTYPE_IVAL,         "FOR" },
 358     { FORMAT,           TOKENTYPE_NONE,         "FORMAT" },
 359     { FORMLBRACK,       TOKENTYPE_NONE,         "FORMLBRACK" },
 360     { FORMRBRACK,       TOKENTYPE_NONE,         "FORMRBRACK" },
 361     { FUNC,             TOKENTYPE_OPNUM,        "FUNC" },
 362     { FUNC0,            TOKENTYPE_OPNUM,        "FUNC0" },
 363     { FUNC0OP,          TOKENTYPE_OPVAL,        "FUNC0OP" },
 364     { FUNC0SUB,         TOKENTYPE_OPVAL,        "FUNC0SUB" },
 365     { FUNC1,            TOKENTYPE_OPNUM,        "FUNC1" },
 366     { FUNCMETH,         TOKENTYPE_OPVAL,        "FUNCMETH" },
 367     { GIVEN,            TOKENTYPE_IVAL,         "GIVEN" },
 368     { HASHBRACK,        TOKENTYPE_NONE,         "HASHBRACK" },
 369     { IF,               TOKENTYPE_IVAL,         "IF" },
 370     { LABEL,            TOKENTYPE_OPVAL,        "LABEL" },
 371     { LOCAL,            TOKENTYPE_IVAL,         "LOCAL" },
 372     { LOOPEX,           TOKENTYPE_OPNUM,        "LOOPEX" },
 373     { LSTOP,            TOKENTYPE_OPNUM,        "LSTOP" },
 374     { LSTOPSUB,         TOKENTYPE_OPVAL,        "LSTOPSUB" },
 375     { MATCHOP,          TOKENTYPE_OPNUM,        "MATCHOP" },
 376     { METHOD,           TOKENTYPE_OPVAL,        "METHOD" },
 377     { MULOP,            TOKENTYPE_OPNUM,        "MULOP" },
 378     { MY,               TOKENTYPE_IVAL,         "MY" },
 379     { NCEQOP,           TOKENTYPE_OPNUM,        "NCEQOP" },
 380     { NCRELOP,          TOKENTYPE_OPNUM,        "NCRELOP" },
 381     { NOAMP,            TOKENTYPE_NONE,         "NOAMP" },
 382     { NOTOP,            TOKENTYPE_NONE,         "NOTOP" },
 383     { OROP,             TOKENTYPE_IVAL,         "OROP" },
 384     { OROR,             TOKENTYPE_NONE,         "OROR" },
 385     { PACKAGE,          TOKENTYPE_NONE,         "PACKAGE" },
 386     { PLUGEXPR,         TOKENTYPE_OPVAL,        "PLUGEXPR" },
 387     { PLUGSTMT,         TOKENTYPE_OPVAL,        "PLUGSTMT" },
 388     { PMFUNC,           TOKENTYPE_OPVAL,        "PMFUNC" },
 389     { POSTJOIN,         TOKENTYPE_NONE,         "POSTJOIN" },
 390     { POSTDEC,          TOKENTYPE_NONE,         "POSTDEC" },
 391     { POSTINC,          TOKENTYPE_NONE,         "POSTINC" },
 392     { POWOP,            TOKENTYPE_OPNUM,        "POWOP" },
 393     { PREDEC,           TOKENTYPE_NONE,         "PREDEC" },
 394     { PREINC,           TOKENTYPE_NONE,         "PREINC" },
 395     { PRIVATEREF,       TOKENTYPE_OPVAL,        "PRIVATEREF" },
 396     { QWLIST,           TOKENTYPE_OPVAL,        "QWLIST" },
 397     { REFGEN,           TOKENTYPE_NONE,         "REFGEN" },
 398     { REQUIRE,          TOKENTYPE_NONE,         "REQUIRE" },
 399     { SHIFTOP,          TOKENTYPE_OPNUM,        "SHIFTOP" },
 400     { SIGSUB,           TOKENTYPE_NONE,         "SIGSUB" },
 401     { SUB,              TOKENTYPE_NONE,         "SUB" },
 402     { SUBLEXEND,        TOKENTYPE_NONE,         "SUBLEXEND" },
 403     { SUBLEXSTART,      TOKENTYPE_NONE,         "SUBLEXSTART" },
 404     { THING,            TOKENTYPE_OPVAL,        "THING" },
 405     { UMINUS,           TOKENTYPE_NONE,         "UMINUS" },
 406     { UNIOP,            TOKENTYPE_OPNUM,        "UNIOP" },
 407     { UNIOPSUB,         TOKENTYPE_OPVAL,        "UNIOPSUB" },
 408     { UNLESS,           TOKENTYPE_IVAL,         "UNLESS" },
 409     { UNTIL,            TOKENTYPE_IVAL,         "UNTIL" },
 410     { USE,              TOKENTYPE_IVAL,         "USE" },
 411     { WHEN,             TOKENTYPE_IVAL,         "WHEN" },
 412     { WHILE,            TOKENTYPE_IVAL,         "WHILE" },
 413     { BAREWORD,         TOKENTYPE_OPVAL,        "BAREWORD" },
 414     { YADAYADA,         TOKENTYPE_IVAL,         "YADAYADA" },
 415     { 0,                TOKENTYPE_NONE,         NULL }
 416 };
 417
 418 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 419
 420 STATIC int
 421 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 422 {
 423     PERL_ARGS_ASSERT_TOKEREPORT;
 424
 425     if (DEBUG_T_TEST) {
 426         const char *name = NULL;
 427         enum token_type type = TOKENTYPE_NONE;
 428         const struct debug_tokens *p;
 429         SV* const report = newSVpvs("<== ");
 430
 431         for (p = debug_tokens; p->token; p++) {
 432             if (p->token == (int)rv) {
 433                 name = p->name;
 434                 type = p->type;
 435                 break;
 436             }
 437         }
 438         if (name)
 439             Perl_sv_catpv(aTHX_ report, name);
 440         else if (isGRAPH(rv))
 441         {
 442             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 443             if ((char)rv == 'p')
 444                 sv_catpvs(report, " (pending identifier)");
 445         }
 446         else if (!rv)
 447             sv_catpvs(report, "EOF");
 448         else
 449             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 450         switch (type) {
 451         case TOKENTYPE_NONE:
 452             break;
 453         case TOKENTYPE_IVAL:
 454             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 455             break;
 456         case TOKENTYPE_OPNUM:
 457             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 458                                     PL_op_name[lvalp->ival]);
 459             break;
 460         case TOKENTYPE_PVAL:
 461             Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
 462             break;
 463         case TOKENTYPE_OPVAL:
 464             if (lvalp->opval) {
 465                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 466                                     PL_op_name[lvalp->opval->op_type]);
 467                 if (lvalp->opval->op_type == OP_CONST) {
 468                     Perl_sv_catpvf(aTHX_ report, " %s",
 469                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 470                 }
 471
 472             }
 473             else
 474                 sv_catpvs(report, "(opval=null)");
 475             break;
 476         }
 477         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 478     };
 479     return (int)rv;
 480 }
 481
 482
 483 /* print the buffer with suitable escapes */
 484
 485 STATIC void
 486 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 487 {
 488     SV* const tmp = newSVpvs("");
 489
 490     PERL_ARGS_ASSERT_PRINTBUF;
 491
 492     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 493     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 494     GCC_DIAG_RESTORE_STMT;
 495     SvREFCNT_dec(tmp);
 496 }
 497
 498 #endif
 499
 500 /*
 501  * S_ao
 502  *
 503  * This subroutine looks for an '=' next to the operator that has just been
 504  * parsed and turns it into an ASSIGNOP if it finds one.
 505  */
 506
 507 STATIC int
 508 S_ao(pTHX_ int toketype)
 509 {
 510     if (*PL_bufptr == '=') {
 511         PL_bufptr++;
 512         if (toketype == ANDAND)
 513             pl_yylval.ival = OP_ANDASSIGN;
 514         else if (toketype == OROR)
 515             pl_yylval.ival = OP_ORASSIGN;
 516         else if (toketype == DORDOR)
 517             pl_yylval.ival = OP_DORASSIGN;
 518         toketype = ASSIGNOP;
 519     }
 520     return REPORT(toketype);
 521 }
 522
 523 /*
 524  * S_no_op
 525  * When Perl expects an operator and finds something else, no_op
 526  * prints the warning.  It always prints "<something> found where
 527  * operator expected.  It prints "Missing semicolon on previous line?"
 528  * if the surprise occurs at the start of the line.  "do you need to
 529  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 530  * where the compiler doesn't know if foo is a method call or a function.
 531  * It prints "Missing operator before end of line" if there's nothing
 532  * after the missing operator, or "... before <...>" if there is something
 533  * after the missing operator.
 534  *
 535  * PL_bufptr is expected to point to the start of the thing that was found,
 536  * and s after the next token or partial token.
 537  */
 538
 539 STATIC void
 540 S_no_op(pTHX_ const char *const what, char *s)
 541 {
 542     char * const oldbp = PL_bufptr;
 543     const bool is_first = (PL_oldbufptr == PL_linestart);
 544
 545     PERL_ARGS_ASSERT_NO_OP;
 546
 547     if (!s)
 548         s = oldbp;
 549     else
 550         PL_bufptr = s;
 551     yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
 552     if (ckWARN_d(WARN_SYNTAX)) {
 553         if (is_first)
 554             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 555                     "\t(Missing semicolon on previous line?)\n");
 556         else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
 557                                                            PL_bufend,
 558                                                            UTF))
 559         {
 560             const char *t;
 561             for (t = PL_oldoldbufptr;
 562                  (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 563                  t += UTF ? UTF8SKIP(t) : 1)
 564             {
 565                 NOOP;
 566             }
 567             if (t < PL_bufptr && isSPACE(*t))
 568                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 569                         "\t(Do you need to predeclare %" UTF8f "?)\n",
 570                       UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
 571         }
 572         else {
 573             assert(s >= oldbp);
 574             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
 575                     "\t(Missing operator before %" UTF8f "?)\n",
 576                      UTF8fARG(UTF, s - oldbp, oldbp));
 577         }
 578     }
 579     PL_bufptr = oldbp;
 580 }
 581
 582 /*
 583  * S_missingterm
 584  * Complain about missing quote/regexp/heredoc terminator.
 585  * If it's called with NULL then it cauterizes the line buffer.
 586  * If we're in a delimited string and the delimiter is a control
 587  * character, it's reformatted into a two-char sequence like ^C.
 588  * This is fatal.
 589  */
 590
 591 STATIC void
 592 S_missingterm(pTHX_ char *s, STRLEN len)
 593 {
 594     char tmpbuf[UTF8_MAXBYTES + 1];
 595     char q;
 596     bool uni = FALSE;
 597     SV *sv;
 598     if (s) {
 599         char * const nl = (char *) my_memrchr(s, '\n', len);
 600         if (nl) {
 601             *nl = '\0';
 602             len = nl - s;
 603         }
 604         uni = UTF;
 605     }
 606     else if (PL_multi_close < 32) {
 607         *tmpbuf = '^';
 608         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 609         tmpbuf[2] = '\0';
 610         s = tmpbuf;
 611         len = 2;
 612     }
 613     else {
 614         if (LIKELY(PL_multi_close < 256)) {
 615             *tmpbuf = (char)PL_multi_close;
 616             tmpbuf[1] = '\0';
 617             len = 1;
 618         }
 619         else {
 620             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 621             *end = '\0';
 622             len = end - tmpbuf;
 623             uni = TRUE;
 624         }
 625         s = tmpbuf;
 626     }
 627     q = memchr(s, '"', len) ? '\'' : '"';
 628     sv = sv_2mortal(newSVpvn(s, len));
 629     if (uni)
 630         SvUTF8_on(sv);
 631     Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
 632                      " anywhere before EOF", q, SVfARG(sv), q);
 633 }
 634
 635 #include "feature.h"
 636
 637 /*
 638  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 639  * utf16-to-utf8-reversed.
 640  */
 641
 642 #ifdef PERL_CR_FILTER
 643 static void
 644 strip_return(SV *sv)
 645 {
 646     const char *s = SvPVX_const(sv);
 647     const char * const e = s + SvCUR(sv);
 648
 649     PERL_ARGS_ASSERT_STRIP_RETURN;
 650
 651     /* outer loop optimized to do nothing if there are no CR-LFs */
 652     while (s < e) {
 653         if (*s++ == '\r' && *s == '\n') {
 654             /* hit a CR-LF, need to copy the rest */
 655             char *d = s - 1;
 656             *d++ = *s++;
 657             while (s < e) {
 658                 if (*s == '\r' && s[1] == '\n')
 659                     s++;
 660                 *d++ = *s++;
 661             }
 662             SvCUR(sv) -= s - d;
 663             return;
 664         }
 665     }
 666 }
 667
 668 STATIC I32
 669 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 670 {
 671     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 672     if (count > 0 && !maxlen)
 673         strip_return(sv);
 674     return count;
 675 }
 676 #endif
 677
 678 /*
 679 =for apidoc lex_start
 680
 681 Creates and initialises a new lexer/parser state object, supplying
 682 a context in which to lex and parse from a new source of Perl code.
 683 A pointer to the new state object is placed in L</PL_parser>.  An entry
 684 is made on the save stack so that upon unwinding, the new state object
 685 will be destroyed and the former value of L</PL_parser> will be restored.
 686 Nothing else need be done to clean up the parsing context.
 687
 688 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 689 non-null, provides a string (in SV form) containing code to be parsed.
 690 A copy of the string is made, so subsequent modification of C<line>
 691 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 692 from which code will be read to be parsed.  If both are non-null, the
 693 code in C<line> comes first and must consist of complete lines of input,
 694 and C<rsfp> supplies the remainder of the source.
 695
 696 The C<flags> parameter is reserved for future use.  Currently it is only
 697 used by perl internally, so extensions should always pass zero.
 698
 699 =cut
 700 */
 701
 702 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 703    can share filters with the current parser.
 704    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 705    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 706    destruction. This is used to handle the case of defaulting to reading the
 707    script from the standard input because no filename was given on the command
 708    line (without getting confused by situation where STDIN has been closed, so
 709    the script handle is opened on fd 0)  */
 710
 711 void
 712 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 713 {
 714     const char *s = NULL;
 715     yy_parser *parser, *oparser;
 716
 717     if (flags && flags & ~LEX_START_FLAGS)
 718         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 719
 720     /* create and initialise a parser */
 721
 722     Newxz(parser, 1, yy_parser);
 723     parser->old_parser = oparser = PL_parser;
 724     PL_parser = parser;
 725
 726     parser->stack = NULL;
 727     parser->stack_max1 = NULL;
 728     parser->ps = NULL;
 729
 730     /* on scope exit, free this parser and restore any outer one */
 731     SAVEPARSER(parser);
 732     parser->saved_curcop = PL_curcop;
 733
 734     /* initialise lexer state */
 735
 736     parser->nexttoke = 0;
 737     parser->error_count = oparser ? oparser->error_count : 0;
 738     parser->copline = parser->preambling = NOLINE;
 739     parser->lex_state = LEX_NORMAL;
 740     parser->expect = XSTATE;
 741     parser->rsfp = rsfp;
 742     parser->recheck_utf8_validity = TRUE;
 743     parser->rsfp_filters =
 744       !(flags & LEX_START_SAME_FILTER) || !oparser
 745         ? NULL
 746         : MUTABLE_AV(SvREFCNT_inc(
 747             oparser->rsfp_filters
 748              ? oparser->rsfp_filters
 749              : (oparser->rsfp_filters = newAV())
 750           ));
 751
 752     Newx(parser->lex_brackstack, 120, char);
 753     Newx(parser->lex_casestack, 12, char);
 754     *parser->lex_casestack = '\0';
 755     Newxz(parser->lex_shared, 1, LEXSHARED);
 756
 757     if (line) {
 758         STRLEN len;
 759         const U8* first_bad_char_loc;
 760
 761         s = SvPV_const(line, len);
 762
 763         if (   SvUTF8(line)
 764             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 765                                              SvCUR(line),
 766                                              &first_bad_char_loc)))
 767         {
 768             _force_out_malformed_utf8_message(first_bad_char_loc,
 769                                               (U8 *) s + SvCUR(line),
 770                                               0,
 771                                               1 /* 1 means die */ );
 772             NOT_REACHED; /* NOTREACHED */
 773         }
 774
 775         parser->linestr = flags & LEX_START_COPIED
 776                             ? SvREFCNT_inc_simple_NN(line)
 777                             : newSVpvn_flags(s, len, SvUTF8(line));
 778         if (!rsfp)
 779             sv_catpvs(parser->linestr, "\n;");
 780     } else {
 781         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 782     }
 783
 784     parser->oldoldbufptr =
 785         parser->oldbufptr =
 786         parser->bufptr =
 787         parser->linestart = SvPVX(parser->linestr);
 788     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 789     parser->last_lop = parser->last_uni = NULL;
 790
 791     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 792                                                         |LEX_DONT_CLOSE_RSFP));
 793     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 794                                                         |LEX_DONT_CLOSE_RSFP));
 795
 796     parser->in_pod = parser->filtered = 0;
 797 }
 798
 799
 800 /* delete a parser object */
 801
 802 void
 803 Perl_parser_free(pTHX_  const yy_parser *parser)
 804 {
 805     PERL_ARGS_ASSERT_PARSER_FREE;
 806
 807     PL_curcop = parser->saved_curcop;
 808     SvREFCNT_dec(parser->linestr);
 809
 810     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 811         PerlIO_clearerr(parser->rsfp);
 812     else if (parser->rsfp && (!parser->old_parser
 813           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 814         PerlIO_close(parser->rsfp);
 815     SvREFCNT_dec(parser->rsfp_filters);
 816     SvREFCNT_dec(parser->lex_stuff);
 817     SvREFCNT_dec(parser->lex_sub_repl);
 818
 819     Safefree(parser->lex_brackstack);
 820     Safefree(parser->lex_casestack);
 821     Safefree(parser->lex_shared);
 822     PL_parser = parser->old_parser;
 823     Safefree(parser);
 824 }
 825
 826 void
 827 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 828 {
 829     I32 nexttoke = parser->nexttoke;
 830     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 831     while (nexttoke--) {
 832         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 833          && parser->nextval[nexttoke].opval
 834          && parser->nextval[nexttoke].opval->op_slabbed
 835          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 836             op_free(parser->nextval[nexttoke].opval);
 837             parser->nextval[nexttoke].opval = NULL;
 838         }
 839     }
 840 }
 841
 842
 843 /*
 844 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
 845
 846 Buffer scalar containing the chunk currently under consideration of the
 847 text currently being lexed.  This is always a plain string scalar (for
 848 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 849 normal scalar means; instead refer to the buffer directly by the pointer
 850 variables described below.
 851
 852 The lexer maintains various C<char*> pointers to things in the
 853 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
 854 reallocated, all of these pointers must be updated.  Don't attempt to
 855 do this manually, but rather use L</lex_grow_linestr> if you need to
 856 reallocate the buffer.
 857
 858 The content of the text chunk in the buffer is commonly exactly one
 859 complete line of input, up to and including a newline terminator,
 860 but there are situations where it is otherwise.  The octets of the
 861 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
 862 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
 863 flag on this scalar, which may disagree with it.
 864
 865 For direct examination of the buffer, the variable
 866 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
 867 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
 868 of these pointers is usually preferable to examination of the scalar
 869 through normal scalar means.
 870
 871 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
 872
 873 Direct pointer to the end of the chunk of text currently being lexed, the
 874 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
 875 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
 876 always located at the end of the buffer, and does not count as part of
 877 the buffer's contents.
 878
 879 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
 880
 881 Points to the current position of lexing inside the lexer buffer.
 882 Characters around this point may be freely examined, within
 883 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
 884 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
 885 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
 886
 887 Lexing code (whether in the Perl core or not) moves this pointer past
 888 the characters that it consumes.  It is also expected to perform some
 889 bookkeeping whenever a newline character is consumed.  This movement
 890 can be more conveniently performed by the function L</lex_read_to>,
 891 which handles newlines appropriately.
 892
 893 Interpretation of the buffer's octets can be abstracted out by
 894 using the slightly higher-level functions L</lex_peek_unichar> and
 895 L</lex_read_unichar>.
 896
 897 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
 898
 899 Points to the start of the current line inside the lexer buffer.
 900 This is useful for indicating at which column an error occurred, and
 901 not much else.  This must be updated by any lexing code that consumes
 902 a newline; the function L</lex_read_to> handles this detail.
 903
 904 =cut
 905 */
 906
 907 /*
 908 =for apidoc lex_bufutf8
 909
 910 Indicates whether the octets in the lexer buffer
 911 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
 912 of Unicode characters.  If not, they should be interpreted as Latin-1
 913 characters.  This is analogous to the C<SvUTF8> flag for scalars.
 914
 915 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
 916 contains valid UTF-8.  Lexing code must be robust in the face of invalid
 917 encoding.
 918
 919 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
 920 is significant, but not the whole story regarding the input character
 921 encoding.  Normally, when a file is being read, the scalar contains octets
 922 and its C<SvUTF8> flag is off, but the octets should be interpreted as
 923 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
 924 however, the scalar may have the C<SvUTF8> flag on, and in this case its
 925 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
 926 is in effect.  This logic may change in the future; use this function
 927 instead of implementing the logic yourself.
 928
 929 =cut
 930 */
 931
 932 bool
 933 Perl_lex_bufutf8(pTHX)
 934 {
 935     return UTF;
 936 }
 937
 938 /*
 939 =for apidoc lex_grow_linestr
 940
 941 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
 942 at least C<len> octets (including terminating C<NUL>).  Returns a
 943 pointer to the reallocated buffer.  This is necessary before making
 944 any direct modification of the buffer that would increase its length.
 945 L</lex_stuff_pvn> provides a more convenient way to insert text into
 946 the buffer.
 947
 948 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
 949 this function updates all of the lexer's variables that point directly
 950 into the buffer.
 951
 952 =cut
 953 */
 954
 955 char *
 956 Perl_lex_grow_linestr(pTHX_ STRLEN len)
 957 {
 958     SV *linestr;
 959     char *buf;
 960     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
 961     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
 962     bool current;
 963
 964     linestr = PL_parser->linestr;
 965     buf = SvPVX(linestr);
 966     if (len <= SvLEN(linestr))
 967         return buf;
 968
 969     /* Is the lex_shared linestr SV the same as the current linestr SV?
 970      * Only in this case does re_eval_start need adjusting, since it
 971      * points within lex_shared->ls_linestr's buffer */
 972     current = (   !PL_parser->lex_shared->ls_linestr
 973                || linestr == PL_parser->lex_shared->ls_linestr);
 974
 975     bufend_pos = PL_parser->bufend - buf;
 976     bufptr_pos = PL_parser->bufptr - buf;
 977     oldbufptr_pos = PL_parser->oldbufptr - buf;
 978     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
 979     linestart_pos = PL_parser->linestart - buf;
 980     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
 981     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
 982     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
 983                             PL_parser->lex_shared->re_eval_start - buf : 0;
 984
 985     buf = sv_grow(linestr, len);
 986
 987     PL_parser->bufend = buf + bufend_pos;
 988     PL_parser->bufptr = buf + bufptr_pos;
 989     PL_parser->oldbufptr = buf + oldbufptr_pos;
 990     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
 991     PL_parser->linestart = buf + linestart_pos;
 992     if (PL_parser->last_uni)
 993         PL_parser->last_uni = buf + last_uni_pos;
 994     if (PL_parser->last_lop)
 995         PL_parser->last_lop = buf + last_lop_pos;
 996     if (current && PL_parser->lex_shared->re_eval_start)
 997         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
 998     return buf;
 999 }
1000
1001 /*
1002 =for apidoc lex_stuff_pvn
1003
1004 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1005 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1006 reallocating the buffer if necessary.  This means that lexing code that
1007 runs later will see the characters as if they had appeared in the input.
1008 It is not recommended to do this as part of normal parsing, and most
1009 uses of this facility run the risk of the inserted characters being
1010 interpreted in an unintended manner.
1011
1012 The string to be inserted is represented by C<len> octets starting
1013 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1014 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1015 The characters are recoded for the lexer buffer, according to how the
1016 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1017 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1018 function is more convenient.
1019
1020 =for apidoc Amnh||LEX_STUFF_UTF8
1021
1022 =cut
1023 */
1024
1025 void
1026 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1027 {
1028     dVAR;
1029     char *bufptr;
1030     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1031     if (flags & ~(LEX_STUFF_UTF8))
1032         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1033     if (UTF) {
1034         if (flags & LEX_STUFF_UTF8) {
1035             goto plain_copy;
1036         } else {
1037             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1038                                                        (U8 *) pv + len);
1039             const char *p, *e = pv+len;;
1040             if (!highhalf)
1041                 goto plain_copy;
1042             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1043             bufptr = PL_parser->bufptr;
1044             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1045             SvCUR_set(PL_parser->linestr,
1046                 SvCUR(PL_parser->linestr) + len+highhalf);
1047             PL_parser->bufend += len+highhalf;
1048             for (p = pv; p != e; p++) {
1049                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1050             }
1051         }
1052     } else {
1053         if (flags & LEX_STUFF_UTF8) {
1054             STRLEN highhalf = 0;
1055             const char *p, *e = pv+len;
1056             for (p = pv; p != e; p++) {
1057                 U8 c = (U8)*p;
1058                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1059                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1060                                 "non-Latin-1 character into Latin-1 input");
1061                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1062                     p++;
1063                     highhalf++;
1064                 } else assert(UTF8_IS_INVARIANT(c));
1065             }
1066             if (!highhalf)
1067                 goto plain_copy;
1068             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1069             bufptr = PL_parser->bufptr;
1070             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1071             SvCUR_set(PL_parser->linestr,
1072                 SvCUR(PL_parser->linestr) + len-highhalf);
1073             PL_parser->bufend += len-highhalf;
1074             p = pv;
1075             while (p < e) {
1076                 if (UTF8_IS_INVARIANT(*p)) {
1077                     *bufptr++ = *p;
1078                     p++;
1079                 }
1080                 else {
1081                     assert(p < e -1 );
1082                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1083                     p += 2;
1084                 }
1085             }
1086         } else {
1087           plain_copy:
1088             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1089             bufptr = PL_parser->bufptr;
1090             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1091             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1092             PL_parser->bufend += len;
1093             Copy(pv, bufptr, len, char);
1094         }
1095     }
1096 }
1097
1098 /*
1099 =for apidoc lex_stuff_pv
1100
1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103 reallocating the buffer if necessary.  This means that lexing code that
1104 runs later will see the characters as if they had appeared in the input.
1105 It is not recommended to do this as part of normal parsing, and most
1106 uses of this facility run the risk of the inserted characters being
1107 interpreted in an unintended manner.
1108
1109 The string to be inserted is represented by octets starting at C<pv>
1110 and continuing to the first nul.  These octets are interpreted as either
1111 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1112 in C<flags>.  The characters are recoded for the lexer buffer, according
1113 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1114 If it is not convenient to nul-terminate a string to be inserted, the
1115 L</lex_stuff_pvn> function is more appropriate.
1116
1117 =cut
1118 */
1119
1120 void
1121 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1122 {
1123     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1124     lex_stuff_pvn(pv, strlen(pv), flags);
1125 }
1126
1127 /*
1128 =for apidoc lex_stuff_sv
1129
1130 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1131 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1132 reallocating the buffer if necessary.  This means that lexing code that
1133 runs later will see the characters as if they had appeared in the input.
1134 It is not recommended to do this as part of normal parsing, and most
1135 uses of this facility run the risk of the inserted characters being
1136 interpreted in an unintended manner.
1137
1138 The string to be inserted is the string value of C<sv>.  The characters
1139 are recoded for the lexer buffer, according to how the buffer is currently
1140 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1141 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1142 need to construct a scalar.
1143
1144 =cut
1145 */
1146
1147 void
1148 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1149 {
1150     char *pv;
1151     STRLEN len;
1152     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1153     if (flags)
1154         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1155     pv = SvPV(sv, len);
1156     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1157 }
1158
1159 /*
1160 =for apidoc lex_unstuff
1161
1162 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1163 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1164 This hides the discarded text from any lexing code that runs later,
1165 as if the text had never appeared.
1166
1167 This is not the normal way to consume lexed text.  For that, use
1168 L</lex_read_to>.
1169
1170 =cut
1171 */
1172
1173 void
1174 Perl_lex_unstuff(pTHX_ char *ptr)
1175 {
1176     char *buf, *bufend;
1177     STRLEN unstuff_len;
1178     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1179     buf = PL_parser->bufptr;
1180     if (ptr < buf)
1181         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1182     if (ptr == buf)
1183         return;
1184     bufend = PL_parser->bufend;
1185     if (ptr > bufend)
1186         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187     unstuff_len = ptr - buf;
1188     Move(ptr, buf, bufend+1-ptr, char);
1189     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1190     PL_parser->bufend = bufend - unstuff_len;
1191 }
1192
1193 /*
1194 =for apidoc lex_read_to
1195
1196 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1197 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1198 performing the correct bookkeeping whenever a newline character is passed.
1199 This is the normal way to consume lexed text.
1200
1201 Interpretation of the buffer's octets can be abstracted out by
1202 using the slightly higher-level functions L</lex_peek_unichar> and
1203 L</lex_read_unichar>.
1204
1205 =cut
1206 */
1207
1208 void
1209 Perl_lex_read_to(pTHX_ char *ptr)
1210 {
1211     char *s;
1212     PERL_ARGS_ASSERT_LEX_READ_TO;
1213     s = PL_parser->bufptr;
1214     if (ptr < s || ptr > PL_parser->bufend)
1215         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1216     for (; s != ptr; s++)
1217         if (*s == '\n') {
1218             COPLINE_INC_WITH_HERELINES;
1219             PL_parser->linestart = s+1;
1220         }
1221     PL_parser->bufptr = ptr;
1222 }
1223
1224 /*
1225 =for apidoc lex_discard_to
1226
1227 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1228 up to C<ptr>.  The remaining content of the buffer will be moved, and
1229 all pointers into the buffer updated appropriately.  C<ptr> must not
1230 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1231 it is not permitted to discard text that has yet to be lexed.
1232
1233 Normally it is not necessarily to do this directly, because it suffices to
1234 use the implicit discarding behaviour of L</lex_next_chunk> and things
1235 based on it.  However, if a token stretches across multiple lines,
1236 and the lexing code has kept multiple lines of text in the buffer for
1237 that purpose, then after completion of the token it would be wise to
1238 explicitly discard the now-unneeded earlier lines, to avoid future
1239 multi-line tokens growing the buffer without bound.
1240
1241 =cut
1242 */
1243
1244 void
1245 Perl_lex_discard_to(pTHX_ char *ptr)
1246 {
1247     char *buf;
1248     STRLEN discard_len;
1249     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1250     buf = SvPVX(PL_parser->linestr);
1251     if (ptr < buf)
1252         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1253     if (ptr == buf)
1254         return;
1255     if (ptr > PL_parser->bufptr)
1256         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1257     discard_len = ptr - buf;
1258     if (PL_parser->oldbufptr < ptr)
1259         PL_parser->oldbufptr = ptr;
1260     if (PL_parser->oldoldbufptr < ptr)
1261         PL_parser->oldoldbufptr = ptr;
1262     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1263         PL_parser->last_uni = NULL;
1264     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1265         PL_parser->last_lop = NULL;
1266     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1267     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1268     PL_parser->bufend -= discard_len;
1269     PL_parser->bufptr -= discard_len;
1270     PL_parser->oldbufptr -= discard_len;
1271     PL_parser->oldoldbufptr -= discard_len;
1272     if (PL_parser->last_uni)
1273         PL_parser->last_uni -= discard_len;
1274     if (PL_parser->last_lop)
1275         PL_parser->last_lop -= discard_len;
1276 }
1277
1278 void
1279 Perl_notify_parser_that_changed_to_utf8(pTHX)
1280 {
1281     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1282      * off to on.  At compile time, this has the effect of entering a 'use
1283      * utf8' section.  This means that any input was not previously checked for
1284      * UTF-8 (because it was off), but now we do need to check it, or our
1285      * assumptions about the input being sane could be wrong, and we could
1286      * segfault.  This routine just sets a flag so that the next time we look
1287      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1288      * proper phase, there may not be a parser object, but if there is, setting
1289      * the flag is harmless */
1290
1291     if (PL_parser) {
1292         PL_parser->recheck_utf8_validity = TRUE;
1293     }
1294 }
1295
1296 /*
1297 =for apidoc lex_next_chunk
1298
1299 Reads in the next chunk of text to be lexed, appending it to
1300 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1301 looked to the end of the current chunk and wants to know more.  It is
1302 usual, but not necessary, for lexing to have consumed the entirety of
1303 the current chunk at this time.
1304
1305 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1306 chunk (i.e., the current chunk has been entirely consumed), normally the
1307 current chunk will be discarded at the same time that the new chunk is
1308 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1309 will not be discarded.  If the current chunk has not been entirely
1310 consumed, then it will not be discarded regardless of the flag.
1311
1312 Returns true if some new text was added to the buffer, or false if the
1313 buffer has reached the end of the input text.
1314
1315 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1316
1317 =cut
1318 */
1319
1320 #define LEX_FAKE_EOF 0x80000000
1321 #define LEX_NO_TERM  0x40000000 /* here-doc */
1322
1323 bool
1324 Perl_lex_next_chunk(pTHX_ U32 flags)
1325 {
1326     SV *linestr;
1327     char *buf;
1328     STRLEN old_bufend_pos, new_bufend_pos;
1329     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1330     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1331     bool got_some_for_debugger = 0;
1332     bool got_some;
1333
1334     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1335         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1336     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1337         return FALSE;
1338     linestr = PL_parser->linestr;
1339     buf = SvPVX(linestr);
1340     if (!(flags & LEX_KEEP_PREVIOUS)
1341           && PL_parser->bufptr == PL_parser->bufend)
1342     {
1343         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1344         linestart_pos = 0;
1345         if (PL_parser->last_uni != PL_parser->bufend)
1346             PL_parser->last_uni = NULL;
1347         if (PL_parser->last_lop != PL_parser->bufend)
1348             PL_parser->last_lop = NULL;
1349         last_uni_pos = last_lop_pos = 0;
1350         *buf = 0;
1351         SvCUR_set(linestr, 0);
1352     } else {
1353         old_bufend_pos = PL_parser->bufend - buf;
1354         bufptr_pos = PL_parser->bufptr - buf;
1355         oldbufptr_pos = PL_parser->oldbufptr - buf;
1356         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1357         linestart_pos = PL_parser->linestart - buf;
1358         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1359         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1360     }
1361     if (flags & LEX_FAKE_EOF) {
1362         goto eof;
1363     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1364         got_some = 0;
1365     } else if (filter_gets(linestr, old_bufend_pos)) {
1366         got_some = 1;
1367         got_some_for_debugger = 1;
1368     } else if (flags & LEX_NO_TERM) {
1369         got_some = 0;
1370     } else {
1371         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1372             SvPVCLEAR(linestr);
1373         eof:
1374         /* End of real input.  Close filehandle (unless it was STDIN),
1375          * then add implicit termination.
1376          */
1377         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1378             PerlIO_clearerr(PL_parser->rsfp);
1379         else if (PL_parser->rsfp)
1380             (void)PerlIO_close(PL_parser->rsfp);
1381         PL_parser->rsfp = NULL;
1382         PL_parser->in_pod = PL_parser->filtered = 0;
1383         if (!PL_in_eval && PL_minus_p) {
1384             sv_catpvs(linestr,
1385                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1386             PL_minus_n = PL_minus_p = 0;
1387         } else if (!PL_in_eval && PL_minus_n) {
1388             sv_catpvs(linestr, /*{*/";}");
1389             PL_minus_n = 0;
1390         } else
1391             sv_catpvs(linestr, ";");
1392         got_some = 1;
1393     }
1394     buf = SvPVX(linestr);
1395     new_bufend_pos = SvCUR(linestr);
1396     PL_parser->bufend = buf + new_bufend_pos;
1397     PL_parser->bufptr = buf + bufptr_pos;
1398
1399     if (UTF) {
1400         const U8* first_bad_char_loc;
1401         if (UNLIKELY(! is_utf8_string_loc(
1402                             (U8 *) PL_parser->bufptr,
1403                                    PL_parser->bufend - PL_parser->bufptr,
1404                                    &first_bad_char_loc)))
1405         {
1406             _force_out_malformed_utf8_message(first_bad_char_loc,
1407                                               (U8 *) PL_parser->bufend,
1408                                               0,
1409                                               1 /* 1 means die */ );
1410             NOT_REACHED; /* NOTREACHED */
1411         }
1412     }
1413
1414     PL_parser->oldbufptr = buf + oldbufptr_pos;
1415     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1416     PL_parser->linestart = buf + linestart_pos;
1417     if (PL_parser->last_uni)
1418         PL_parser->last_uni = buf + last_uni_pos;
1419     if (PL_parser->last_lop)
1420         PL_parser->last_lop = buf + last_lop_pos;
1421     if (PL_parser->preambling != NOLINE) {
1422         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1423         PL_parser->preambling = NOLINE;
1424     }
1425     if (   got_some_for_debugger
1426         && PERLDB_LINE_OR_SAVESRC
1427         && PL_curstash != PL_debstash)
1428     {
1429         /* debugger active and we're not compiling the debugger code,
1430          * so store the line into the debugger's array of lines
1431          */
1432         update_debugger_info(NULL, buf+old_bufend_pos,
1433             new_bufend_pos-old_bufend_pos);
1434     }
1435     return got_some;
1436 }
1437
1438 /*
1439 =for apidoc lex_peek_unichar
1440
1441 Looks ahead one (Unicode) character in the text currently being lexed.
1442 Returns the codepoint (unsigned integer value) of the next character,
1443 or -1 if lexing has reached the end of the input text.  To consume the
1444 peeked character, use L</lex_read_unichar>.
1445
1446 If the next character is in (or extends into) the next chunk of input
1447 text, the next chunk will be read in.  Normally the current chunk will be
1448 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1449 bit set, then the current chunk will not be discarded.
1450
1451 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1452 is encountered, an exception is generated.
1453
1454 =cut
1455 */
1456
1457 I32
1458 Perl_lex_peek_unichar(pTHX_ U32 flags)
1459 {
1460     dVAR;
1461     char *s, *bufend;
1462     if (flags & ~(LEX_KEEP_PREVIOUS))
1463         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1464     s = PL_parser->bufptr;
1465     bufend = PL_parser->bufend;
1466     if (UTF) {
1467         U8 head;
1468         I32 unichar;
1469         STRLEN len, retlen;
1470         if (s == bufend) {
1471             if (!lex_next_chunk(flags))
1472                 return -1;
1473             s = PL_parser->bufptr;
1474             bufend = PL_parser->bufend;
1475         }
1476         head = (U8)*s;
1477         if (UTF8_IS_INVARIANT(head))
1478             return head;
1479         if (UTF8_IS_START(head)) {
1480             len = UTF8SKIP(&head);
1481             while ((STRLEN)(bufend-s) < len) {
1482                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1483                     break;
1484                 s = PL_parser->bufptr;
1485                 bufend = PL_parser->bufend;
1486             }
1487         }
1488         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1489         if (retlen == (STRLEN)-1) {
1490             _force_out_malformed_utf8_message((U8 *) s,
1491                                               (U8 *) bufend,
1492                                               0,
1493                                               1 /* 1 means die */ );
1494             NOT_REACHED; /* NOTREACHED */
1495         }
1496         return unichar;
1497     } else {
1498         if (s == bufend) {
1499             if (!lex_next_chunk(flags))
1500                 return -1;
1501             s = PL_parser->bufptr;
1502         }
1503         return (U8)*s;
1504     }
1505 }
1506
1507 /*
1508 =for apidoc lex_read_unichar
1509
1510 Reads the next (Unicode) character in the text currently being lexed.
1511 Returns the codepoint (unsigned integer value) of the character read,
1512 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1513 if lexing has reached the end of the input text.  To non-destructively
1514 examine the next character, use L</lex_peek_unichar> instead.
1515
1516 If the next character is in (or extends into) the next chunk of input
1517 text, the next chunk will be read in.  Normally the current chunk will be
1518 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1519 bit set, then the current chunk will not be discarded.
1520
1521 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1522 is encountered, an exception is generated.
1523
1524 =cut
1525 */
1526
1527 I32
1528 Perl_lex_read_unichar(pTHX_ U32 flags)
1529 {
1530     I32 c;
1531     if (flags & ~(LEX_KEEP_PREVIOUS))
1532         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1533     c = lex_peek_unichar(flags);
1534     if (c != -1) {
1535         if (c == '\n')
1536             COPLINE_INC_WITH_HERELINES;
1537         if (UTF)
1538             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1539         else
1540             ++(PL_parser->bufptr);
1541     }
1542     return c;
1543 }
1544
1545 /*
1546 =for apidoc lex_read_space
1547
1548 Reads optional spaces, in Perl style, in the text currently being
1549 lexed.  The spaces may include ordinary whitespace characters and
1550 Perl-style comments.  C<#line> directives are processed if encountered.
1551 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1552 at a non-space character (or the end of the input text).
1553
1554 If spaces extend into the next chunk of input text, the next chunk will
1555 be read in.  Normally the current chunk will be discarded at the same
1556 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1557 chunk will not be discarded.
1558
1559 =cut
1560 */
1561
1562 #define LEX_NO_INCLINE    0x40000000
1563 #define LEX_NO_NEXT_CHUNK 0x80000000
1564
1565 void
1566 Perl_lex_read_space(pTHX_ U32 flags)
1567 {
1568     char *s, *bufend;
1569     const bool can_incline = !(flags & LEX_NO_INCLINE);
1570     bool need_incline = 0;
1571     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1572         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1573     s = PL_parser->bufptr;
1574     bufend = PL_parser->bufend;
1575     while (1) {
1576         char c = *s;
1577         if (c == '#') {
1578             do {
1579                 c = *++s;
1580             } while (!(c == '\n' || (c == 0 && s == bufend)));
1581         } else if (c == '\n') {
1582             s++;
1583             if (can_incline) {
1584                 PL_parser->linestart = s;
1585                 if (s == bufend)
1586                     need_incline = 1;
1587                 else
1588                     incline(s, bufend);
1589             }
1590         } else if (isSPACE(c)) {
1591             s++;
1592         } else if (c == 0 && s == bufend) {
1593             bool got_more;
1594             line_t l;
1595             if (flags & LEX_NO_NEXT_CHUNK)
1596                 break;
1597             PL_parser->bufptr = s;
1598             l = CopLINE(PL_curcop);
1599             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1600             got_more = lex_next_chunk(flags);
1601             CopLINE_set(PL_curcop, l);
1602             s = PL_parser->bufptr;
1603             bufend = PL_parser->bufend;
1604             if (!got_more)
1605                 break;
1606             if (can_incline && need_incline && PL_parser->rsfp) {
1607                 incline(s, bufend);
1608                 need_incline = 0;
1609             }
1610         } else if (!c) {
1611             s++;
1612         } else {
1613             break;
1614         }
1615     }
1616     PL_parser->bufptr = s;
1617 }
1618
1619 /*
1620
1621 =for apidoc validate_proto
1622
1623 This function performs syntax checking on a prototype, C<proto>.
1624 If C<warn> is true, any illegal characters or mismatched brackets
1625 will trigger illegalproto warnings, declaring that they were
1626 detected in the prototype for C<name>.
1627
1628 The return value is C<true> if this is a valid prototype, and
1629 C<false> if it is not, regardless of whether C<warn> was C<true> or
1630 C<false>.
1631
1632 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1633
1634 =cut
1635
1636  */
1637
1638 bool
1639 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1640 {
1641     STRLEN len, origlen;
1642     char *p;
1643     bool bad_proto = FALSE;
1644     bool in_brackets = FALSE;
1645     bool after_slash = FALSE;
1646     char greedy_proto = ' ';
1647     bool proto_after_greedy_proto = FALSE;
1648     bool must_be_last = FALSE;
1649     bool underscore = FALSE;
1650     bool bad_proto_after_underscore = FALSE;
1651
1652     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1653
1654     if (!proto)
1655         return TRUE;
1656
1657     p = SvPV(proto, len);
1658     origlen = len;
1659     for (; len--; p++) {
1660         if (!isSPACE(*p)) {
1661             if (must_be_last)
1662                 proto_after_greedy_proto = TRUE;
1663             if (underscore) {
1664                 if (!memCHRs(";@%", *p))
1665                     bad_proto_after_underscore = TRUE;
1666                 underscore = FALSE;
1667             }
1668             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1669                 bad_proto = TRUE;
1670             }
1671             else {
1672                 if (*p == '[')
1673                     in_brackets = TRUE;
1674                 else if (*p == ']')
1675                     in_brackets = FALSE;
1676                 else if ((*p == '@' || *p == '%')
1677                          && !after_slash
1678                          && !in_brackets )
1679                 {
1680                     must_be_last = TRUE;
1681                     greedy_proto = *p;
1682                 }
1683                 else if (*p == '_')
1684                     underscore = TRUE;
1685             }
1686             if (*p == '\\')
1687                 after_slash = TRUE;
1688             else
1689                 after_slash = FALSE;
1690         }
1691     }
1692
1693     if (warn) {
1694         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1695         p -= origlen;
1696         p = SvUTF8(proto)
1697             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1698                              origlen, UNI_DISPLAY_ISPRINT)
1699             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1700
1701         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1702             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1703             sv_catpvs(name2, "::");
1704             sv_catsv(name2, (SV *)name);
1705             name = name2;
1706         }
1707
1708         if (proto_after_greedy_proto)
1709             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1710                         "Prototype after '%c' for %" SVf " : %s",
1711                         greedy_proto, SVfARG(name), p);
1712         if (in_brackets)
1713             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1714                         "Missing ']' in prototype for %" SVf " : %s",
1715                         SVfARG(name), p);
1716         if (bad_proto)
1717             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1718                         "Illegal character in prototype for %" SVf " : %s",
1719                         SVfARG(name), p);
1720         if (bad_proto_after_underscore)
1721             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1722                         "Illegal character after '_' in prototype for %" SVf " : %s",
1723                         SVfARG(name), p);
1724     }
1725
1726     return (! (proto_after_greedy_proto || bad_proto) );
1727 }
1728
1729 /*
1730  * S_incline
1731  * This subroutine has nothing to do with tilting, whether at windmills
1732  * or pinball tables.  Its name is short for "increment line".  It
1733  * increments the current line number in CopLINE(PL_curcop) and checks
1734  * to see whether the line starts with a comment of the form
1735  *    # line 500 "foo.pm"
1736  * If so, it sets the current line number and file to the values in the comment.
1737  */
1738
1739 STATIC void
1740 S_incline(pTHX_ const char *s, const char *end)
1741 {
1742     const char *t;
1743     const char *n;
1744     const char *e;
1745     line_t line_num;
1746     UV uv;
1747
1748     PERL_ARGS_ASSERT_INCLINE;
1749
1750     assert(end >= s);
1751
1752     COPLINE_INC_WITH_HERELINES;
1753     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1754      && s+1 == PL_bufend && *s == ';') {
1755         /* fake newline in string eval */
1756         CopLINE_dec(PL_curcop);
1757         return;
1758     }
1759     if (*s++ != '#')
1760         return;
1761     while (SPACE_OR_TAB(*s))
1762         s++;
1763     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1764         s += sizeof("line") - 1;
1765     else
1766         return;
1767     if (SPACE_OR_TAB(*s))
1768         s++;
1769     else
1770         return;
1771     while (SPACE_OR_TAB(*s))
1772         s++;
1773     if (!isDIGIT(*s))
1774         return;
1775
1776     n = s;
1777     while (isDIGIT(*s))
1778         s++;
1779     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1780         return;
1781     while (SPACE_OR_TAB(*s))
1782         s++;
1783     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1784         s++;
1785         e = t + 1;
1786     }
1787     else {
1788         t = s;
1789         while (*t && !isSPACE(*t))
1790             t++;
1791         e = t;
1792     }
1793     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1794         e++;
1795     if (*e != '\n' && *e != '\0')
1796         return;         /* false alarm */
1797
1798     if (!grok_atoUV(n, &uv, &e))
1799         return;
1800     line_num = ((line_t)uv) - 1;
1801
1802     if (t - s > 0) {
1803         const STRLEN len = t - s;
1804
1805         if (!PL_rsfp && !PL_parser->filtered) {
1806             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1807              * to *{"::_<newfilename"} */
1808             /* However, the long form of evals is only turned on by the
1809                debugger - usually they're "(eval %lu)" */
1810             GV * const cfgv = CopFILEGV(PL_curcop);
1811             if (cfgv) {
1812                 char smallbuf[128];
1813                 STRLEN tmplen2 = len;
1814                 char *tmpbuf2;
1815                 GV *gv2;
1816
1817                 if (tmplen2 + 2 <= sizeof smallbuf)
1818                     tmpbuf2 = smallbuf;
1819                 else
1820                     Newx(tmpbuf2, tmplen2 + 2, char);
1821
1822                 tmpbuf2[0] = '_';
1823                 tmpbuf2[1] = '<';
1824
1825                 memcpy(tmpbuf2 + 2, s, tmplen2);
1826                 tmplen2 += 2;
1827
1828                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1829                 if (!isGV(gv2)) {
1830                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1831                     /* adjust ${"::_<newfilename"} to store the new file name */
1832                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1833                     /* The line number may differ. If that is the case,
1834                        alias the saved lines that are in the array.
1835                        Otherwise alias the whole array. */
1836                     if (CopLINE(PL_curcop) == line_num) {
1837                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1838                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1839                     }
1840                     else if (GvAV(cfgv)) {
1841                         AV * const av = GvAV(cfgv);
1842                         const line_t start = CopLINE(PL_curcop)+1;
1843                         SSize_t items = AvFILLp(av) - start;
1844                         if (items > 0) {
1845                             AV * const av2 = GvAVn(gv2);
1846                             SV **svp = AvARRAY(av) + start;
1847                             Size_t l = line_num+1;
1848                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1849                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1850                         }
1851                     }
1852                 }
1853
1854                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1855             }
1856         }
1857         CopFILE_free(PL_curcop);
1858         CopFILE_setn(PL_curcop, s, len);
1859     }
1860     CopLINE_set(PL_curcop, line_num);
1861 }
1862
1863 STATIC void
1864 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1865 {
1866     AV *av = CopFILEAVx(PL_curcop);
1867     if (av) {
1868         SV * sv;
1869         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1870         else {
1871             sv = *av_fetch(av, 0, 1);
1872             SvUPGRADE(sv, SVt_PVMG);
1873         }
1874         if (!SvPOK(sv)) SvPVCLEAR(sv);
1875         if (orig_sv)
1876             sv_catsv(sv, orig_sv);
1877         else
1878             sv_catpvn(sv, buf, len);
1879         if (!SvIOK(sv)) {
1880             (void)SvIOK_on(sv);
1881             SvIV_set(sv, 0);
1882         }
1883         if (PL_parser->preambling == NOLINE)
1884             av_store(av, CopLINE(PL_curcop), sv);
1885     }
1886 }
1887
1888 /*
1889  * skipspace
1890  * Called to gobble the appropriate amount and type of whitespace.
1891  * Skips comments as well.
1892  * Returns the next character after the whitespace that is skipped.
1893  *
1894  * peekspace
1895  * Same thing, but look ahead without incrementing line numbers or
1896  * adjusting PL_linestart.
1897  */
1898
1899 #define skipspace(s) skipspace_flags(s, 0)
1900 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1901
1902 char *
1903 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1904 {
1905     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1906     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1907         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1908             s++;
1909     } else {
1910         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1911         PL_bufptr = s;
1912         lex_read_space(flags | LEX_KEEP_PREVIOUS |
1913                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1914                     LEX_NO_NEXT_CHUNK : 0));
1915         s = PL_bufptr;
1916         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1917         if (PL_linestart > PL_bufptr)
1918             PL_bufptr = PL_linestart;
1919         return s;
1920     }
1921     return s;
1922 }
1923
1924 /*
1925  * S_check_uni
1926  * Check the unary operators to ensure there's no ambiguity in how they're
1927  * used.  An ambiguous piece of code would be:
1928  *     rand + 5
1929  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
1930  * the +5 is its argument.
1931  */
1932
1933 STATIC void
1934 S_check_uni(pTHX)
1935 {
1936     const char *s;
1937
1938     if (PL_oldoldbufptr != PL_last_uni)
1939         return;
1940     while (isSPACE(*PL_last_uni))
1941         PL_last_uni++;
1942     s = PL_last_uni;
1943     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1944         s += UTF ? UTF8SKIP(s) : 1;
1945     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1946         return;
1947
1948     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1949                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1950                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1951 }
1952
1953 /*
1954  * LOP : macro to build a list operator.  Its behaviour has been replaced
1955  * with a subroutine, S_lop() for which LOP is just another name.
1956  */
1957
1958 #define LOP(f,x) return lop(f,x,s)
1959
1960 /*
1961  * S_lop
1962  * Build a list operator (or something that might be one).  The rules:
1963  *  - if we have a next token, then it's a list operator (no parens) for
1964  *    which the next token has already been parsed; e.g.,
1965  *       sort foo @args
1966  *       sort foo (@args)
1967  *  - if the next thing is an opening paren, then it's a function
1968  *  - else it's a list operator
1969  */
1970
1971 STATIC I32
1972 S_lop(pTHX_ I32 f, U8 x, char *s)
1973 {
1974     PERL_ARGS_ASSERT_LOP;
1975
1976     pl_yylval.ival = f;
1977     CLINE;
1978     PL_bufptr = s;
1979     PL_last_lop = PL_oldbufptr;
1980     PL_last_lop_op = (OPCODE)f;
1981     if (PL_nexttoke)
1982         goto lstop;
1983     PL_expect = x;
1984     if (*s == '(')
1985         return REPORT(FUNC);
1986     s = skipspace(s);
1987     if (*s == '(')
1988         return REPORT(FUNC);
1989     else {
1990         lstop:
1991         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1992             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1993         return REPORT(LSTOP);
1994     }
1995 }
1996
1997 /*
1998  * S_force_next
1999  * When the lexer realizes it knows the next token (for instance,
2000  * it is reordering tokens for the parser) then it can call S_force_next
2001  * to know what token to return the next time the lexer is called.  Caller
2002  * will need to set PL_nextval[] and possibly PL_expect to ensure
2003  * the lexer handles the token correctly.
2004  */
2005
2006 STATIC void
2007 S_force_next(pTHX_ I32 type)
2008 {
2009 #ifdef DEBUGGING
2010     if (DEBUG_T_TEST) {
2011         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2012         tokereport(type, &NEXTVAL_NEXTTOKE);
2013     }
2014 #endif
2015     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2016     PL_nexttype[PL_nexttoke] = type;
2017     PL_nexttoke++;
2018 }
2019
2020 /*
2021  * S_postderef
2022  *
2023  * This subroutine handles postfix deref syntax after the arrow has already
2024  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2025  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2026  * only the first, leaving yylex to find the next.
2027  */
2028
2029 static int
2030 S_postderef(pTHX_ int const funny, char const next)
2031 {
2032     assert(funny == DOLSHARP || memCHRs("$@%&*", funny));
2033     if (next == '*') {
2034         PL_expect = XOPERATOR;
2035         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2036             assert('@' == funny || '$' == funny || DOLSHARP == funny);
2037             PL_lex_state = LEX_INTERPEND;
2038             if ('@' == funny)
2039                 force_next(POSTJOIN);
2040         }
2041         force_next(next);
2042         PL_bufptr+=2;
2043     }
2044     else {
2045         if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2046          && !PL_lex_brackets)
2047             PL_lex_dojoin = 2;
2048         PL_expect = XOPERATOR;
2049         PL_bufptr++;
2050     }
2051     return funny;
2052 }
2053
2054 void
2055 Perl_yyunlex(pTHX)
2056 {
2057     int yyc = PL_parser->yychar;
2058     if (yyc != YYEMPTY) {
2059         if (yyc) {
2060             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2061             if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2062                 PL_lex_allbrackets--;
2063                 PL_lex_brackets--;
2064                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2065             } else if (yyc == '('/*)*/) {
2066                 PL_lex_allbrackets--;
2067                 yyc |= (2<<24);
2068             }
2069             force_next(yyc);
2070         }
2071         PL_parser->yychar = YYEMPTY;
2072     }
2073 }
2074
2075 STATIC SV *
2076 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2077 {
2078     SV * const sv = newSVpvn_utf8(start, len,
2079                     ! IN_BYTES
2080                   &&  UTF
2081                   &&  len != 0
2082                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2083     return sv;
2084 }
2085
2086 /*
2087  * S_force_word
2088  * When the lexer knows the next thing is a word (for instance, it has
2089  * just seen -> and it knows that the next char is a word char, then
2090  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2091  * lookahead.
2092  *
2093  * Arguments:
2094  *   char *start : buffer position (must be within PL_linestr)
2095  *   int token   : PL_next* will be this type of bare word
2096  *                 (e.g., METHOD,BAREWORD)
2097  *   int check_keyword : if true, Perl checks to make sure the word isn't
2098  *       a keyword (do this if the word is a label, e.g. goto FOO)
2099  *   int allow_pack : if true, : characters will also be allowed (require,
2100  *       use, etc. do this)
2101  */
2102
2103 STATIC char *
2104 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2105 {
2106     char *s;
2107     STRLEN len;
2108
2109     PERL_ARGS_ASSERT_FORCE_WORD;
2110
2111     start = skipspace(start);
2112     s = start;
2113     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2114         || (allow_pack && *s == ':' && s[1] == ':') )
2115     {
2116         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2117         if (check_keyword) {
2118           char *s2 = PL_tokenbuf;
2119           STRLEN len2 = len;
2120           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2121             s2 += sizeof("CORE::") - 1;
2122             len2 -= sizeof("CORE::") - 1;
2123           }
2124           if (keyword(s2, len2, 0))
2125             return start;
2126         }
2127         if (token == METHOD) {
2128             s = skipspace(s);
2129             if (*s == '(')
2130                 PL_expect = XTERM;
2131             else {
2132                 PL_expect = XOPERATOR;
2133             }
2134         }
2135         NEXTVAL_NEXTTOKE.opval
2136             = newSVOP(OP_CONST,0,
2137                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2138         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2139         force_next(token);
2140     }
2141     return s;
2142 }
2143
2144 /*
2145  * S_force_ident
2146  * Called when the lexer wants $foo *foo &foo etc, but the program
2147  * text only contains the "foo" portion.  The first argument is a pointer
2148  * to the "foo", and the second argument is the type symbol to prefix.
2149  * Forces the next token to be a "BAREWORD".
2150  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2151  */
2152
2153 STATIC void
2154 S_force_ident(pTHX_ const char *s, int kind)
2155 {
2156     PERL_ARGS_ASSERT_FORCE_IDENT;
2157
2158     if (s[0]) {
2159         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2160         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2161                                                                 UTF ? SVf_UTF8 : 0));
2162         NEXTVAL_NEXTTOKE.opval = o;
2163         force_next(BAREWORD);
2164         if (kind) {
2165             o->op_private = OPpCONST_ENTERED;
2166             /* XXX see note in pp_entereval() for why we forgo typo
2167                warnings if the symbol must be introduced in an eval.
2168                GSAR 96-10-12 */
2169             gv_fetchpvn_flags(s, len,
2170                               (PL_in_eval ? GV_ADDMULTI
2171                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2172                               kind == '$' ? SVt_PV :
2173                               kind == '@' ? SVt_PVAV :
2174                               kind == '%' ? SVt_PVHV :
2175                               SVt_PVGV
2176                               );
2177         }
2178     }
2179 }
2180
2181 static void
2182 S_force_ident_maybe_lex(pTHX_ char pit)
2183 {
2184     NEXTVAL_NEXTTOKE.ival = pit;
2185     force_next('p');
2186 }
2187
2188 NV
2189 Perl_str_to_version(pTHX_ SV *sv)
2190 {
2191     NV retval = 0.0;
2192     NV nshift = 1.0;
2193     STRLEN len;
2194     const char *start = SvPV_const(sv,len);
2195     const char * const end = start + len;
2196     const bool utf = cBOOL(SvUTF8(sv));
2197
2198     PERL_ARGS_ASSERT_STR_TO_VERSION;
2199
2200     while (start < end) {
2201         STRLEN skip;
2202         UV n;
2203         if (utf)
2204             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2205         else {
2206             n = *(U8*)start;
2207             skip = 1;
2208         }
2209         retval += ((NV)n)/nshift;
2210         start += skip;
2211         nshift *= 1000;
2212     }
2213     return retval;
2214 }
2215
2216 /*
2217  * S_force_version
2218  * Forces the next token to be a version number.
2219  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2220  * and if "guessing" is TRUE, then no new token is created (and the caller
2221  * must use an alternative parsing method).
2222  */
2223
2224 STATIC char *
2225 S_force_version(pTHX_ char *s, int guessing)
2226 {
2227     OP *version = NULL;
2228     char *d;
2229
2230     PERL_ARGS_ASSERT_FORCE_VERSION;
2231
2232     s = skipspace(s);
2233
2234     d = s;
2235     if (*d == 'v')
2236         d++;
2237     if (isDIGIT(*d)) {
2238         while (isDIGIT(*d) || *d == '_' || *d == '.')
2239             d++;
2240         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2241             SV *ver;
2242             s = scan_num(s, &pl_yylval);
2243             version = pl_yylval.opval;
2244             ver = cSVOPx(version)->op_sv;
2245             if (SvPOK(ver) && !SvNIOK(ver)) {
2246                 SvUPGRADE(ver, SVt_PVNV);
2247                 SvNV_set(ver, str_to_version(ver));
2248                 SvNOK_on(ver);          /* hint that it is a version */
2249             }
2250         }
2251         else if (guessing) {
2252             return s;
2253         }
2254     }
2255
2256     /* NOTE: The parser sees the package name and the VERSION swapped */
2257     NEXTVAL_NEXTTOKE.opval = version;
2258     force_next(BAREWORD);
2259
2260     return s;
2261 }
2262
2263 /*
2264  * S_force_strict_version
2265  * Forces the next token to be a version number using strict syntax rules.
2266  */
2267
2268 STATIC char *
2269 S_force_strict_version(pTHX_ char *s)
2270 {
2271     OP *version = NULL;
2272     const char *errstr = NULL;
2273
2274     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2275
2276     while (isSPACE(*s)) /* leading whitespace */
2277         s++;
2278
2279     if (is_STRICT_VERSION(s,&errstr)) {
2280         SV *ver = newSV(0);
2281         s = (char *)scan_version(s, ver, 0);
2282         version = newSVOP(OP_CONST, 0, ver);
2283     }
2284     else if ((*s != ';' && *s != '{' && *s != '}' )
2285              && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2286     {
2287         PL_bufptr = s;
2288         if (errstr)
2289             yyerror(errstr); /* version required */
2290         return s;
2291     }
2292
2293     /* NOTE: The parser sees the package name and the VERSION swapped */
2294     NEXTVAL_NEXTTOKE.opval = version;
2295     force_next(BAREWORD);
2296
2297     return s;
2298 }
2299
2300 /*
2301  * S_tokeq
2302  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2303  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2304  * unchanged, and a new SV containing the modified input is returned.
2305  */
2306
2307 STATIC SV *
2308 S_tokeq(pTHX_ SV *sv)
2309 {
2310     char *s;
2311     char *send;
2312     char *d;
2313     SV *pv = sv;
2314
2315     PERL_ARGS_ASSERT_TOKEQ;
2316
2317     assert (SvPOK(sv));
2318     assert (SvLEN(sv));
2319     assert (!SvIsCOW(sv));
2320     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2321         goto finish;
2322     s = SvPVX(sv);
2323     send = SvEND(sv);
2324     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2325     while (s < send && !(*s == '\\' && s[1] == '\\'))
2326         s++;
2327     if (s == send)
2328         goto finish;
2329     d = s;
2330     if ( PL_hints & HINT_NEW_STRING ) {
2331         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2332                             SVs_TEMP | SvUTF8(sv));
2333     }
2334     while (s < send) {
2335         if (*s == '\\') {
2336             if (s + 1 < send && (s[1] == '\\'))
2337                 s++;            /* all that, just for this */
2338         }
2339         *d++ = *s++;
2340     }
2341     *d = '\0';
2342     SvCUR_set(sv, d - SvPVX_const(sv));
2343   finish:
2344     if ( PL_hints & HINT_NEW_STRING )
2345        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2346     return sv;
2347 }
2348
2349 /*
2350  * Now come three functions related to double-quote context,
2351  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2352  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2353  * interact with PL_lex_state, and create fake ( ... ) argument lists
2354  * to handle functions and concatenation.
2355  * For example,
2356  *   "foo\lbar"
2357  * is tokenised as
2358  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2359  */
2360
2361 /*
2362  * S_sublex_start
2363  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2364  *
2365  * Pattern matching will set PL_lex_op to the pattern-matching op to
2366  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2367  *
2368  * OP_CONST is easy--just make the new op and return.
2369  *
2370  * Everything else becomes a FUNC.
2371  *
2372  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2373  * had an OP_CONST.  This just sets us up for a
2374  * call to S_sublex_push().
2375  */
2376
2377 STATIC I32
2378 S_sublex_start(pTHX)
2379 {
2380     const I32 op_type = pl_yylval.ival;
2381
2382     if (op_type == OP_NULL) {
2383         pl_yylval.opval = PL_lex_op;
2384         PL_lex_op = NULL;
2385         return THING;
2386     }
2387     if (op_type == OP_CONST) {
2388         SV *sv = PL_lex_stuff;
2389         PL_lex_stuff = NULL;
2390         sv = tokeq(sv);
2391
2392         if (SvTYPE(sv) == SVt_PVIV) {
2393             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2394             STRLEN len;
2395             const char * const p = SvPV_const(sv, len);
2396             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2397             SvREFCNT_dec(sv);
2398             sv = nsv;
2399         }
2400         pl_yylval.opval = newSVOP(op_type, 0, sv);
2401         return THING;
2402     }
2403
2404     PL_parser->lex_super_state = PL_lex_state;
2405     PL_parser->lex_sub_inwhat = (U16)op_type;
2406     PL_parser->lex_sub_op = PL_lex_op;
2407     PL_parser->sub_no_recover = FALSE;
2408     PL_parser->sub_error_count = PL_error_count;
2409     PL_lex_state = LEX_INTERPPUSH;
2410
2411     PL_expect = XTERM;
2412     if (PL_lex_op) {
2413         pl_yylval.opval = PL_lex_op;
2414         PL_lex_op = NULL;
2415         return PMFUNC;
2416     }
2417     else
2418         return FUNC;
2419 }
2420
2421 /*
2422  * S_sublex_push
2423  * Create a new scope to save the lexing state.  The scope will be
2424  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2425  * to the uc, lc, etc. found before.
2426  * Sets PL_lex_state to LEX_INTERPCONCAT.
2427  */
2428
2429 STATIC I32
2430 S_sublex_push(pTHX)
2431 {
2432     LEXSHARED *shared;
2433     const bool is_heredoc = PL_multi_close == '<';
2434     ENTER;
2435
2436     PL_lex_state = PL_parser->lex_super_state;
2437     SAVEI8(PL_lex_dojoin);
2438     SAVEI32(PL_lex_brackets);
2439     SAVEI32(PL_lex_allbrackets);
2440     SAVEI32(PL_lex_formbrack);
2441     SAVEI8(PL_lex_fakeeof);
2442     SAVEI32(PL_lex_casemods);
2443     SAVEI32(PL_lex_starts);
2444     SAVEI8(PL_lex_state);
2445     SAVESPTR(PL_lex_repl);
2446     SAVEVPTR(PL_lex_inpat);
2447     SAVEI16(PL_lex_inwhat);
2448     if (is_heredoc)
2449     {
2450         SAVECOPLINE(PL_curcop);
2451         SAVEI32(PL_multi_end);
2452         SAVEI32(PL_parser->herelines);
2453         PL_parser->herelines = 0;
2454     }
2455     SAVEIV(PL_multi_close);
2456     SAVEPPTR(PL_bufptr);
2457     SAVEPPTR(PL_bufend);
2458     SAVEPPTR(PL_oldbufptr);
2459     SAVEPPTR(PL_oldoldbufptr);
2460     SAVEPPTR(PL_last_lop);
2461     SAVEPPTR(PL_last_uni);
2462     SAVEPPTR(PL_linestart);
2463     SAVESPTR(PL_linestr);
2464     SAVEGENERICPV(PL_lex_brackstack);
2465     SAVEGENERICPV(PL_lex_casestack);
2466     SAVEGENERICPV(PL_parser->lex_shared);
2467     SAVEBOOL(PL_parser->lex_re_reparsing);
2468     SAVEI32(PL_copline);
2469
2470     /* The here-doc parser needs to be able to peek into outer lexing
2471        scopes to find the body of the here-doc.  So we put PL_linestr and
2472        PL_bufptr into lex_shared, to ‘share’ those values.
2473      */
2474     PL_parser->lex_shared->ls_linestr = PL_linestr;
2475     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2476
2477     PL_linestr = PL_lex_stuff;
2478     PL_lex_repl = PL_parser->lex_sub_repl;
2479     PL_lex_stuff = NULL;
2480     PL_parser->lex_sub_repl = NULL;
2481
2482     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2483        set for an inner quote-like operator and then an error causes scope-
2484        popping.  We must not have a PL_lex_stuff value left dangling, as
2485        that breaks assumptions elsewhere.  See bug #123617.  */
2486     SAVEGENERICSV(PL_lex_stuff);
2487     SAVEGENERICSV(PL_parser->lex_sub_repl);
2488
2489     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2490         = SvPVX(PL_linestr);
2491     PL_bufend += SvCUR(PL_linestr);
2492     PL_last_lop = PL_last_uni = NULL;
2493     SAVEFREESV(PL_linestr);
2494     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2495
2496     PL_lex_dojoin = FALSE;
2497     PL_lex_brackets = PL_lex_formbrack = 0;
2498     PL_lex_allbrackets = 0;
2499     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2500     Newx(PL_lex_brackstack, 120, char);
2501     Newx(PL_lex_casestack, 12, char);
2502     PL_lex_casemods = 0;
2503     *PL_lex_casestack = '\0';
2504     PL_lex_starts = 0;
2505     PL_lex_state = LEX_INTERPCONCAT;
2506     if (is_heredoc)
2507         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2508     PL_copline = NOLINE;
2509
2510     Newxz(shared, 1, LEXSHARED);
2511     shared->ls_prev = PL_parser->lex_shared;
2512     PL_parser->lex_shared = shared;
2513
2514     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2515     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2516     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2517         PL_lex_inpat = PL_parser->lex_sub_op;
2518     else
2519         PL_lex_inpat = NULL;
2520
2521     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2522     PL_in_eval &= ~EVAL_RE_REPARSING;
2523
2524     return SUBLEXSTART;
2525 }
2526
2527 /*
2528  * S_sublex_done
2529  * Restores lexer state after a S_sublex_push.
2530  */
2531
2532 STATIC I32
2533 S_sublex_done(pTHX)
2534 {
2535     if (!PL_lex_starts++) {
2536         SV * const sv = newSVpvs("");
2537         if (SvUTF8(PL_linestr))
2538             SvUTF8_on(sv);
2539         PL_expect = XOPERATOR;
2540         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2541         return THING;
2542     }
2543
2544     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2545         PL_lex_state = LEX_INTERPCASEMOD;
2546         return yylex();
2547     }
2548
2549     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2550     assert(PL_lex_inwhat != OP_TRANSR);
2551     if (PL_lex_repl) {
2552         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2553         PL_linestr = PL_lex_repl;
2554         PL_lex_inpat = 0;
2555         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2556         PL_bufend += SvCUR(PL_linestr);
2557         PL_last_lop = PL_last_uni = NULL;
2558         PL_lex_dojoin = FALSE;
2559         PL_lex_brackets = 0;
2560         PL_lex_allbrackets = 0;
2561         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2562         PL_lex_casemods = 0;
2563         *PL_lex_casestack = '\0';
2564         PL_lex_starts = 0;
2565         if (SvEVALED(PL_lex_repl)) {
2566             PL_lex_state = LEX_INTERPNORMAL;
2567             PL_lex_starts++;
2568             /*  we don't clear PL_lex_repl here, so that we can check later
2569                 whether this is an evalled subst; that means we rely on the
2570                 logic to ensure sublex_done() is called again only via the
2571                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2572         }
2573         else {
2574             PL_lex_state = LEX_INTERPCONCAT;
2575             PL_lex_repl = NULL;
2576         }
2577         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2578             CopLINE(PL_curcop) +=
2579                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2580                  + PL_parser->herelines;
2581             PL_parser->herelines = 0;
2582         }
2583         return '/';
2584     }
2585     else {
2586         const line_t l = CopLINE(PL_curcop);
2587         LEAVE;
2588         if (PL_parser->sub_error_count != PL_error_count) {
2589             if (PL_parser->sub_no_recover) {
2590                 yyquit();
2591                 NOT_REACHED;
2592             }
2593         }
2594         if (PL_multi_close == '<')
2595             PL_parser->herelines += l - PL_multi_end;
2596         PL_bufend = SvPVX(PL_linestr);
2597         PL_bufend += SvCUR(PL_linestr);
2598         PL_expect = XOPERATOR;
2599         return SUBLEXEND;
2600     }
2601 }
2602
2603 HV *
2604 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2605                           const STRLEN context_len, const char ** error_msg)
2606 {
2607     /* Load the official _charnames module if not already there.  The
2608      * parameters are just to give info for any error messages generated:
2609      *  char_name   a name to look up which is the reason for loading this
2610      *  context     'char_name' in the context in the input in which it appears
2611      *  context_len how many bytes 'context' occupies
2612      *  error_msg   *error_msg will be set to any error
2613      *
2614      *  Returns the ^H table if success; otherwise NULL */
2615
2616     unsigned int i;
2617     HV * table;
2618     SV **cvp;
2619     SV * res;
2620
2621     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2622
2623     /* This loop is executed 1 1/2 times.  On the first time through, if it
2624      * isn't already loaded, try loading it, and iterate just once to see if it
2625      * worked.  */
2626     for (i = 0; i < 2; i++) {
2627         table = GvHV(PL_hintgv);                 /* ^H */
2628
2629         if (    table
2630             && (PL_hints & HINT_LOCALIZE_HH)
2631             && (cvp = hv_fetchs(table, "charnames", FALSE))
2632             &&  SvOK(*cvp))
2633         {
2634             return table;   /* Quit if already loaded */
2635         }
2636
2637         if (i == 0) {
2638             Perl_load_module(aTHX_
2639                 0,
2640                 newSVpvs("_charnames"),
2641
2642                 /* version parameter; no need to specify it, as if we get too early
2643                 * a version, will fail anyway, not being able to find 'charnames'
2644                 * */
2645                 NULL,
2646                 newSVpvs(":full"),
2647                 newSVpvs(":short"),
2648                 NULL);
2649         }
2650     }
2651
2652     /* Here, it failed; new_constant will give appropriate error messages */
2653     *error_msg = NULL;
2654     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2655                         context, context_len, error_msg);
2656     SvREFCNT_dec(res);
2657
2658     return NULL;
2659 }
2660
2661 STATIC SV*
2662 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2663 {
2664     /* This justs wraps get_and_check_backslash_N_name() to output any error
2665      * message it returns. */
2666
2667     const char * error_msg = NULL;
2668     SV * result;
2669
2670     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2671
2672     /* charnames doesn't work well if there have been errors found */
2673     if (PL_error_count > 0) {
2674         return NULL;
2675     }
2676
2677     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2678
2679     if (error_msg) {
2680         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2681     }
2682
2683     return result;
2684 }
2685
2686 SV*
2687 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2688                                           const char* const e,
2689                                           const bool is_utf8,
2690                                           const char ** error_msg)
2691 {
2692     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2693      * interior, hence to the "}".  Finds what the name resolves to, returning
2694      * an SV* containing it; NULL if no valid one found.
2695      *
2696      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2697      * doesn't have to be. */
2698
2699     SV* char_name;
2700     SV* res;
2701     HV * table;
2702     SV **cvp;
2703     SV *cv;
2704     SV *rv;
2705     HV *stash;
2706
2707     /* Points to the beginning of the \N{... so that any messages include the
2708      * context of what's failing*/
2709     const char* context = s - 3;
2710     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2711
2712     dVAR;
2713
2714     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2715
2716     assert(e >= s);
2717     assert(s > (char *) 3);
2718
2719     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2720
2721     if (!SvCUR(char_name)) {
2722         SvREFCNT_dec_NN(char_name);
2723         /* diag_listed_as: Unknown charname '%s' */
2724         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2725         return NULL;
2726     }
2727
2728     /* Autoload the charnames module */
2729
2730     table = load_charnames(char_name, context, context_len, error_msg);
2731     if (table == NULL) {
2732         return NULL;
2733     }
2734
2735     *error_msg = NULL;
2736     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2737                         context, context_len, error_msg);
2738     if (*error_msg) {
2739         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2740
2741         SvREFCNT_dec(res);
2742         return NULL;
2743     }
2744
2745     /* See if the charnames handler is the Perl core's, and if so, we can skip
2746      * the validation needed for a user-supplied one, as Perl's does its own
2747      * validation. */
2748     cvp = hv_fetchs(table, "charnames", FALSE);
2749     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2750         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2751     {
2752         const char * const name = HvNAME(stash);
2753          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2754            return res;
2755        }
2756     }
2757
2758     /* Here, it isn't Perl's charname handler.  We can't rely on a
2759      * user-supplied handler to validate the input name.  For non-ut8 input,
2760      * look to see that the first character is legal.  Then loop through the
2761      * rest checking that each is a continuation */
2762
2763     /* This code makes the reasonable assumption that the only Latin1-range
2764      * characters that begin a character name alias are alphabetic, otherwise
2765      * would have to create a isCHARNAME_BEGIN macro */
2766
2767     if (! is_utf8) {
2768         if (! isALPHAU(*s)) {
2769             goto bad_charname;
2770         }
2771         s++;
2772         while (s < e) {
2773             if (! isCHARNAME_CONT(*s)) {
2774                 goto bad_charname;
2775             }
2776             if (*s == ' ' && *(s-1) == ' ') {
2777                 goto multi_spaces;
2778             }
2779             s++;
2780         }
2781     }
2782     else {
2783         /* Similarly for utf8.  For invariants can check directly; for other
2784          * Latin1, can calculate their code point and check; otherwise  use an
2785          * inversion list */
2786         if (UTF8_IS_INVARIANT(*s)) {
2787             if (! isALPHAU(*s)) {
2788                 goto bad_charname;
2789             }
2790             s++;
2791         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2792             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2793                 goto bad_charname;
2794             }
2795             s += 2;
2796         }
2797         else {
2798             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2799                                        utf8_to_uvchr_buf((U8 *) s,
2800                                                          (U8 *) e,
2801                                                          NULL)))
2802             {
2803                 goto bad_charname;
2804             }
2805             s += UTF8SKIP(s);
2806         }
2807
2808         while (s < e) {
2809             if (UTF8_IS_INVARIANT(*s)) {
2810                 if (! isCHARNAME_CONT(*s)) {
2811                     goto bad_charname;
2812                 }
2813                 if (*s == ' ' && *(s-1) == ' ') {
2814                     goto multi_spaces;
2815                 }
2816                 s++;
2817             }
2818             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2819                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2820                 {
2821                     goto bad_charname;
2822                 }
2823                 s += 2;
2824             }
2825             else {
2826                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2827                                            utf8_to_uvchr_buf((U8 *) s,
2828                                                              (U8 *) e,
2829                                                              NULL)))
2830                 {
2831                     goto bad_charname;
2832                 }
2833                 s += UTF8SKIP(s);
2834             }
2835         }
2836     }
2837     if (*(s-1) == ' ') {
2838         /* diag_listed_as: charnames alias definitions may not contain
2839                            trailing white-space; marked by <-- HERE in %s
2840          */
2841         *error_msg = Perl_form(aTHX_
2842             "charnames alias definitions may not contain trailing "
2843             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2844             (int)(s - context + 1), context,
2845             (int)(e - s + 1), s + 1);
2846         return NULL;
2847     }
2848
2849     if (SvUTF8(res)) { /* Don't accept malformed charname value */
2850         const U8* first_bad_char_loc;
2851         STRLEN len;
2852         const char* const str = SvPV_const(res, len);
2853         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2854                                           &first_bad_char_loc)))
2855         {
2856             _force_out_malformed_utf8_message(first_bad_char_loc,
2857                                               (U8 *) PL_parser->bufend,
2858                                               0,
2859                                               0 /* 0 means don't die */ );
2860             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2861                                immediately after '%s' */
2862             *error_msg = Perl_form(aTHX_
2863                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2864                  (int) context_len, context,
2865                  (int) ((char *) first_bad_char_loc - str), str);
2866             return NULL;
2867         }
2868     }
2869
2870     return res;
2871
2872   bad_charname: {
2873
2874         /* The final %.*s makes sure that should the trailing NUL be missing
2875          * that this print won't run off the end of the string */
2876         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2877                            in \N{%s} */
2878         *error_msg = Perl_form(aTHX_
2879             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2880             (int)(s - context + 1), context,
2881             (int)(e - s + 1), s + 1);
2882         return NULL;
2883     }
2884
2885   multi_spaces:
2886         /* diag_listed_as: charnames alias definitions may not contain a
2887                            sequence of multiple spaces; marked by <-- HERE
2888                            in %s */
2889         *error_msg = Perl_form(aTHX_
2890             "charnames alias definitions may not contain a sequence of "
2891             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2892             (int)(s - context + 1), context,
2893             (int)(e - s + 1), s + 1);
2894         return NULL;
2895 }
2896
2897 /*
2898   scan_const
2899
2900   Extracts the next constant part of a pattern, double-quoted string,
2901   or transliteration.  This is terrifying code.
2902
2903   For example, in parsing the double-quoted string "ab\x63$d", it would
2904   stop at the '$' and return an OP_CONST containing 'abc'.
2905
2906   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2907   processing a pattern (PL_lex_inpat is true), a transliteration
2908   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2909
2910   Returns a pointer to the character scanned up to. If this is
2911   advanced from the start pointer supplied (i.e. if anything was
2912   successfully parsed), will leave an OP_CONST for the substring scanned
2913   in pl_yylval. Caller must intuit reason for not parsing further
2914   by looking at the next characters herself.
2915
2916   In patterns:
2917     expand:
2918       \N{FOO}  => \N{U+hex_for_character_FOO}
2919       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2920
2921     pass through:
2922         all other \-char, including \N and \N{ apart from \N{ABC}
2923
2924     stops on:
2925         @ and $ where it appears to be a var, but not for $ as tail anchor
2926         \l \L \u \U \Q \E
2927         (?{  or  (??{
2928
2929   In transliterations:
2930     characters are VERY literal, except for - not at the start or end
2931     of the string, which indicates a range.  However some backslash sequences
2932     are recognized: \r, \n, and the like
2933                     \007 \o{}, \x{}, \N{}
2934     If all elements in the transliteration are below 256,
2935     scan_const expands the range to the full set of intermediate
2936     characters. If the range is in utf8, the hyphen is replaced with
2937     a certain range mark which will be handled by pmtrans() in op.c.
2938
2939   In double-quoted strings:
2940     backslashes:
2941       all those recognized in transliterations
2942       deprecated backrefs: \1 (in substitution replacements)
2943       case and quoting: \U \Q \E
2944     stops on @ and $
2945
2946   scan_const does *not* construct ops to handle interpolated strings.
2947   It stops processing as soon as it finds an embedded $ or @ variable
2948   and leaves it to the caller to work out what's going on.
2949
2950   embedded arrays (whether in pattern or not) could be:
2951       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2952
2953   $ in double-quoted strings must be the symbol of an embedded scalar.
2954
2955   $ in pattern could be $foo or could be tail anchor.  Assumption:
2956   it's a tail anchor if $ is the last thing in the string, or if it's
2957   followed by one of "()| \r\n\t"
2958
2959   \1 (backreferences) are turned into $1 in substitutions
2960
2961   The structure of the code is
2962       while (there's a character to process) {
2963           handle transliteration ranges
2964           skip regexp comments /(?#comment)/ and codes /(?{code})/
2965           skip #-initiated comments in //x patterns
2966           check for embedded arrays
2967           check for embedded scalars
2968           if (backslash) {
2969               deprecate \1 in substitution replacements
2970               handle string-changing backslashes \l \U \Q \E, etc.
2971               switch (what was escaped) {
2972                   handle \- in a transliteration (becomes a literal -)
2973                   if a pattern and not \N{, go treat as regular character
2974                   handle \132 (octal characters)
2975                   handle \x15 and \x{1234} (hex characters)
2976                   handle \N{name} (named characters, also \N{3,5} in a pattern)
2977                   handle \cV (control characters)
2978                   handle printf-style backslashes (\f, \r, \n, etc)
2979               } (end switch)
2980               continue
2981           } (end if backslash)
2982           handle regular character
2983     } (end while character to read)
2984
2985 */
2986
2987 STATIC char *
2988 S_scan_const(pTHX_ char *start)
2989 {
2990     char *send = PL_bufend;             /* end of the constant */
2991     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
2992                                            on sizing. */
2993     char *s = start;                    /* start of the constant */
2994     char *d = SvPVX(sv);                /* destination for copies */
2995     bool dorange = FALSE;               /* are we in a translit range? */
2996     bool didrange = FALSE;              /* did we just finish a range? */
2997     bool in_charclass = FALSE;          /* within /[...]/ */
2998     bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
2999                                            UTF8?  But, this can show as true
3000                                            when the source isn't utf8, as for
3001                                            example when it is entirely composed
3002                                            of hex constants */
3003     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3004     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3005                                            number of characters found so far
3006                                            that will expand (into 2 bytes)
3007                                            should we have to convert to
3008                                            UTF-8) */
3009     SV *res;                            /* result from charnames */
3010     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3011                                    high-end character is temporarily placed */
3012
3013     /* Does something require special handling in tr/// ?  This avoids extra
3014      * work in a less likely case.  As such, khw didn't feel it was worth
3015      * adding any branches to the more mainline code to handle this, which
3016      * means that this doesn't get set in some circumstances when things like
3017      * \x{100} get expanded out.  As a result there needs to be extra testing
3018      * done in the tr code */
3019     bool has_above_latin1 = FALSE;
3020
3021     /* Note on sizing:  The scanned constant is placed into sv, which is
3022      * initialized by newSV() assuming one byte of output for every byte of
3023      * input.  This routine expects newSV() to allocate an extra byte for a
3024      * trailing NUL, which this routine will append if it gets to the end of
3025      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3026      * CAPITAL LETTER A}), or more output than input if the constant ends up
3027      * recoded to utf8, but each time a construct is found that might increase
3028      * the needed size, SvGROW() is called.  Its size parameter each time is
3029      * based on the best guess estimate at the time, namely the length used so
3030      * far, plus the length the current construct will occupy, plus room for
3031      * the trailing NUL, plus one byte for every input byte still unscanned */
3032
3033     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3034                        before set */
3035 #ifdef EBCDIC
3036     int backslash_N = 0;            /* ? was the character from \N{} */
3037     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3038                                        platform-specific like \x65 */
3039 #endif
3040
3041     PERL_ARGS_ASSERT_SCAN_CONST;
3042
3043     assert(PL_lex_inwhat != OP_TRANSR);
3044
3045     /* Protect sv from errors and fatal warnings. */
3046     ENTER_with_name("scan_const");
3047     SAVEFREESV(sv);
3048
3049     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3050      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3051      * valid */
3052     assert(*send == '\0');
3053
3054     while (s < send
3055            || dorange   /* Handle tr/// range at right edge of input */
3056     ) {
3057
3058         /* get transliterations out of the way (they're most literal) */
3059         if (PL_lex_inwhat == OP_TRANS) {
3060
3061             /* But there isn't any special handling necessary unless there is a
3062              * range, so for most cases we just drop down and handle the value
3063              * as any other.  There are two exceptions.
3064              *
3065              * 1.  A hyphen indicates that we are actually going to have a
3066              *     range.  In this case, skip the '-', set a flag, then drop
3067              *     down to handle what should be the end range value.
3068              * 2.  After we've handled that value, the next time through, that
3069              *     flag is set and we fix up the range.
3070              *
3071              * Ranges entirely within Latin1 are expanded out entirely, in
3072              * order to make the transliteration a simple table look-up.
3073              * Ranges that extend above Latin1 have to be done differently, so
3074              * there is no advantage to expanding them here, so they are
3075              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3076              * a byte that can't occur in legal UTF-8, and hence can signify a
3077              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3078              * the range is expressed as Unicode, the Latin1 portion is
3079              * expanded out even if the range extends above Latin1.  This is
3080              * because each code point in it has to be processed here
3081              * individually to get its native translation */
3082
3083             if (! dorange) {
3084
3085                 /* Here, we don't think we're in a range.  If the new character
3086                  * is not a hyphen; or if it is a hyphen, but it's too close to
3087                  * either edge to indicate a range, or if we haven't output any
3088                  * characters yet then it's a regular character. */
3089                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3090                 {
3091
3092                     /* A regular character.  Process like any other, but first
3093                      * clear any flags */
3094                     didrange = FALSE;
3095                     dorange = FALSE;
3096 #ifdef EBCDIC
3097                     non_portable_endpoint = 0;
3098                     backslash_N = 0;
3099 #endif
3100                     /* The tests here for being above Latin1 and similar ones
3101                      * in the following 'else' suffice to find all such
3102                      * occurences in the constant, except those added by a
3103                      * backslash escape sequence, like \x{100}.  Mostly, those
3104                      * set 'has_above_latin1' as appropriate */
3105                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3106                         has_above_latin1 = TRUE;
3107                     }
3108
3109                     /* Drops down to generic code to process current byte */
3110                 }
3111                 else {  /* Is a '-' in the context where it means a range */
3112                     if (didrange) { /* Something like y/A-C-Z// */
3113                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3114                                          " operator");
3115                     }
3116
3117                     dorange = TRUE;
3118
3119                     s++;    /* Skip past the hyphen */
3120
3121                     /* d now points to where the end-range character will be
3122                      * placed.  Drop down to get that character.  We'll finish
3123                      * processing the range the next time through the loop */
3124
3125                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3126                         has_above_latin1 = TRUE;
3127                     }
3128
3129                     /* Drops down to generic code to process current byte */
3130                 }
3131             }  /* End of not a range */
3132             else {
3133                 /* Here we have parsed a range.  Now must handle it.  At this
3134                  * point:
3135                  * 'sv' is a SV* that contains the output string we are
3136                  *      constructing.  The final two characters in that string
3137                  *      are the range start and range end, in order.
3138                  * 'd'  points to just beyond the range end in the 'sv' string,
3139                  *      where we would next place something
3140                  */
3141                 char * max_ptr;
3142                 char * min_ptr;
3143                 IV range_min;
3144                 IV range_max;   /* last character in range */
3145                 STRLEN grow;
3146                 Size_t offset_to_min = 0;
3147                 Size_t extras = 0;
3148 #ifdef EBCDIC
3149                 bool convert_unicode;
3150                 IV real_range_max = 0;
3151 #endif
3152                 /* Get the code point values of the range ends. */
3153                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3154                 offset_to_max = max_ptr - SvPVX_const(sv);
3155                 if (d_is_utf8) {
3156                     /* We know the utf8 is valid, because we just constructed
3157                      * it ourselves in previous loop iterations */
3158                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3159                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3160                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3161
3162                     /* This compensates for not all code setting
3163                      * 'has_above_latin1', so that we don't skip stuff that
3164                      * should be executed */
3165                     if (range_max > 255) {
3166                         has_above_latin1 = TRUE;
3167                     }
3168                 }
3169                 else {
3170                     min_ptr = max_ptr - 1;
3171                     range_min = * (U8*) min_ptr;
3172                     range_max = * (U8*) max_ptr;
3173                 }
3174
3175                 /* If the range is just a single code point, like tr/a-a/.../,
3176                  * that code point is already in the output, twice.  We can
3177                  * just back up over the second instance and avoid all the rest
3178                  * of the work.  But if it is a variant character, it's been
3179                  * counted twice, so decrement.  (This unlikely scenario is
3180                  * special cased, like the one for a range of 2 code points
3181                  * below, only because the main-line code below needs a range
3182                  * of 3 or more to work without special casing.  Might as well
3183                  * get it out of the way now.) */
3184                 if (UNLIKELY(range_max == range_min)) {
3185                     d = max_ptr;
3186                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3187                         utf8_variant_count--;
3188                     }
3189                     goto range_done;
3190                 }
3191
3192 #ifdef EBCDIC
3193                 /* On EBCDIC platforms, we may have to deal with portable
3194                  * ranges.  These happen if at least one range endpoint is a
3195                  * Unicode value (\N{...}), or if the range is a subset of
3196                  * [A-Z] or [a-z], and both ends are literal characters,
3197                  * like 'A', and not like \x{C1} */
3198                 convert_unicode =
3199                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3200                                                        hence portable range */
3201                     || (     ! non_portable_endpoint
3202                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3203                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3204                 if (convert_unicode) {
3205
3206                     /* Special handling is needed for these portable ranges.
3207                      * They are defined to be in Unicode terms, which includes
3208                      * all the Unicode code points between the end points.
3209                      * Convert to Unicode to get the Unicode range.  Later we
3210                      * will convert each code point in the range back to
3211                      * native.  */
3212                     range_min = NATIVE_TO_UNI(range_min);
3213                     range_max = NATIVE_TO_UNI(range_max);
3214                 }
3215 #endif
3216
3217                 if (range_min > range_max) {
3218 #ifdef EBCDIC
3219                     if (convert_unicode) {
3220                         /* Need to convert back to native for meaningful
3221                          * messages for this platform */
3222                         range_min = UNI_TO_NATIVE(range_min);
3223                         range_max = UNI_TO_NATIVE(range_max);
3224                     }
3225 #endif
3226                     /* Use the characters themselves for the error message if
3227                      * ASCII printables; otherwise some visible representation
3228                      * of them */
3229                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3230                         Perl_croak(aTHX_
3231                          "Invalid range \"%c-%c\" in transliteration operator",
3232                          (char)range_min, (char)range_max);
3233                     }
3234 #ifdef EBCDIC
3235                     else if (convert_unicode) {
3236         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3237                         Perl_croak(aTHX_
3238                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3239                            UVXf "}\" in transliteration operator",
3240                            range_min, range_max);
3241                     }
3242 #endif
3243                     else {
3244         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3245                         Perl_croak(aTHX_
3246                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3247                            " in transliteration operator",
3248                            range_min, range_max);
3249                     }
3250                 }
3251
3252                 /* If the range is exactly two code points long, they are
3253                  * already both in the output */
3254                 if (UNLIKELY(range_min + 1 == range_max)) {
3255                     goto range_done;
3256                 }
3257
3258                 /* Here the range contains at least 3 code points */
3259
3260                 if (d_is_utf8) {
3261
3262                     /* If everything in the transliteration is below 256, we
3263                      * can avoid special handling later.  A translation table
3264                      * for each of those bytes is created by op.c.  So we
3265                      * expand out all ranges to their constituent code points.
3266                      * But if we've encountered something above 255, the
3267                      * expanding won't help, so skip doing that.  But if it's
3268                      * EBCDIC, we may have to look at each character below 256
3269                      * if we have to convert to/from Unicode values */
3270                     if (   has_above_latin1
3271 #ifdef EBCDIC
3272                         && (range_min > 255 || ! convert_unicode)
3273 #endif
3274                     ) {
3275                         const STRLEN off = d - SvPVX(sv);
3276                         const STRLEN extra = 1 + (send - s) + 1;
3277                         char *e;
3278
3279                         /* Move the high character one byte to the right; then
3280                          * insert between it and the range begin, an illegal
3281                          * byte which serves to indicate this is a range (using
3282                          * a '-' would be ambiguous). */
3283
3284                         if (off + extra > SvLEN(sv)) {
3285                             d = off + SvGROW(sv, off + extra);
3286                             max_ptr = d - off + offset_to_max;
3287                         }
3288
3289                         e = d++;
3290                         while (e-- > max_ptr) {
3291                             *(e + 1) = *e;
3292                         }
3293                         *(e + 1) = (char) RANGE_INDICATOR;
3294                         goto range_done;
3295                     }
3296
3297                     /* Here, we're going to expand out the range.  For EBCDIC
3298                      * the range can extend above 255 (not so in ASCII), so
3299                      * for EBCDIC, split it into the parts above and below
3300                      * 255/256 */
3301 #ifdef EBCDIC
3302                     if (range_max > 255) {
3303                         real_range_max = range_max;
3304                         range_max = 255;
3305                     }
3306 #endif
3307                 }
3308
3309                 /* Here we need to expand out the string to contain each
3310                  * character in the range.  Grow the output to handle this.
3311                  * For non-UTF8, we need a byte for each code point in the
3312                  * range, minus the three that we've already allocated for: the
3313                  * hyphen, the min, and the max.  For UTF-8, we need this
3314                  * plus an extra byte for each code point that occupies two
3315                  * bytes (is variant) when in UTF-8 (except we've already
3316                  * allocated for the end points, including if they are
3317                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3318                  * platforms, it's easy to calculate a precise number.  To
3319                  * start, we count the variants in the range, which we need
3320                  * elsewhere in this function anyway.  (For the case where it
3321                  * isn't easy to calculate, 'extras' has been initialized to 0,
3322                  * and the calculation is done in a loop further down.) */
3323 #ifdef EBCDIC
3324                 if (convert_unicode)
3325 #endif
3326                 {
3327                     /* This is executed unconditionally on ASCII, and for
3328                      * Unicode ranges on EBCDIC.  Under these conditions, all
3329                      * code points above a certain value are variant; and none
3330                      * under that value are.  We just need to find out how much
3331                      * of the range is above that value.  We don't count the
3332                      * end points here, as they will already have been counted
3333                      * as they were parsed. */
3334                     if (range_min >= UTF_CONTINUATION_MARK) {
3335
3336                         /* The whole range is made up of variants */
3337                         extras = (range_max - 1) - (range_min + 1) + 1;
3338                     }
3339                     else if (range_max >= UTF_CONTINUATION_MARK) {
3340
3341                         /* Only the higher portion of the range is variants */
3342                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3343                     }
3344
3345                     utf8_variant_count += extras;
3346                 }
3347
3348                 /* The base growth is the number of code points in the range,
3349                  * not including the endpoints, which have already been sized
3350                  * for (and output).  We don't subtract for the hyphen, as it
3351                  * has been parsed but not output, and the SvGROW below is
3352                  * based only on what's been output plus what's left to parse.
3353                  * */
3354                 grow = (range_max - 1) - (range_min + 1) + 1;
3355
3356                 if (d_is_utf8) {
3357 #ifdef EBCDIC
3358                     /* In some cases in EBCDIC, we haven't yet calculated a
3359                      * precise amount needed for the UTF-8 variants.  Just
3360                      * assume the worst case, that everything will expand by a
3361                      * byte */
3362                     if (! convert_unicode) {
3363                         grow *= 2;
3364                     }
3365                     else
3366 #endif
3367                     {
3368                         /* Otherwise we know exactly how many variants there
3369                          * are in the range. */
3370                         grow += extras;
3371                     }
3372                 }
3373
3374                 /* Grow, but position the output to overwrite the range min end
3375                  * point, because in some cases we overwrite that */
3376                 SvCUR_set(sv, d - SvPVX_const(sv));
3377                 offset_to_min = min_ptr - SvPVX_const(sv);
3378
3379                 /* See Note on sizing above. */
3380                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3381                                              + (send - s)
3382                                              + grow
3383                                              + 1 /* Trailing NUL */ );
3384
3385                 /* Now, we can expand out the range. */
3386 #ifdef EBCDIC
3387                 if (convert_unicode) {
3388                     SSize_t i;
3389
3390                     /* Recall that the min and max are now in Unicode terms, so
3391                      * we have to convert each character to its native
3392                      * equivalent */
3393                     if (d_is_utf8) {
3394                         for (i = range_min; i <= range_max; i++) {
3395                             append_utf8_from_native_byte(
3396                                                     LATIN1_TO_NATIVE((U8) i),
3397                                                     (U8 **) &d);
3398                         }
3399                     }
3400                     else {
3401                         for (i = range_min; i <= range_max; i++) {
3402                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3403                         }
3404                     }
3405                 }
3406                 else
3407 #endif
3408                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3409                 {
3410                     /* Here, no conversions are necessary, which means that the
3411                      * first character in the range is already in 'd' and
3412                      * valid, so we can skip overwriting it */
3413                     if (d_is_utf8) {
3414                         SSize_t i;
3415                         d += UTF8SKIP(d);
3416                         for (i = range_min + 1; i <= range_max; i++) {
3417                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3418                         }
3419                     }
3420                     else {
3421                         SSize_t i;
3422                         d++;
3423                         assert(range_min + 1 <= range_max);
3424                         for (i = range_min + 1; i < range_max; i++) {
3425 #ifdef EBCDIC
3426                             /* In this case on EBCDIC, we haven't calculated
3427                              * the variants.  Do it here, as we go along */
3428                             if (! UVCHR_IS_INVARIANT(i)) {
3429                                 utf8_variant_count++;
3430                             }
3431 #endif
3432                             *d++ = (char)i;
3433                         }
3434
3435                         /* The range_max is done outside the loop so as to
3436                          * avoid having to special case not incrementing
3437                          * 'utf8_variant_count' on EBCDIC (it's already been
3438                          * counted when originally parsed) */
3439                         *d++ = (char) range_max;
3440                     }
3441                 }
3442
3443 #ifdef EBCDIC
3444                 /* If the original range extended above 255, add in that
3445                  * portion. */
3446                 if (real_range_max) {
3447                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3448                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3449                     if (real_range_max > 0x100) {
3450                         if (real_range_max > 0x101) {
3451                             *d++ = (char) RANGE_INDICATOR;
3452                         }
3453                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3454                     }
3455                 }
3456 #endif
3457
3458               range_done:
3459                 /* mark the range as done, and continue */
3460                 didrange = TRUE;
3461                 dorange = FALSE;
3462 #ifdef EBCDIC
3463                 non_portable_endpoint = 0;
3464                 backslash_N = 0;
3465 #endif
3466                 continue;
3467             } /* End of is a range */
3468         } /* End of transliteration.  Joins main code after these else's */
3469         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3470             char *s1 = s-1;
3471             int esc = 0;
3472             while (s1 >= start && *s1-- == '\\')
3473                 esc = !esc;
3474             if (!esc)
3475                 in_charclass = TRUE;
3476         }
3477         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3478             char *s1 = s-1;
3479             int esc = 0;
3480             while (s1 >= start && *s1-- == '\\')
3481                 esc = !esc;
3482             if (!esc)
3483                 in_charclass = FALSE;
3484         }
3485             /* skip for regexp comments /(?#comment)/, except for the last
3486              * char, which will be done separately.  Stop on (?{..}) and
3487              * friends */
3488         else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3489             if (s[2] == '#') {
3490                 if (s_is_utf8) {
3491                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3492
3493                     while (s + len < send && *s != ')') {
3494                         Copy(s, d, len, U8);
3495                         d += len;
3496                         s += len;
3497                         len = UTF8_SAFE_SKIP(s, send);
3498                     }
3499                 }
3500                 else while (s+1 < send && *s != ')') {
3501                     *d++ = *s++;
3502                 }
3503             }
3504             else if (!PL_lex_casemods
3505                      && (    s[2] == '{' /* This should match regcomp.c */
3506                          || (s[2] == '?' && s[3] == '{')))
3507             {
3508                 break;
3509             }
3510         }
3511             /* likewise skip #-initiated comments in //x patterns */
3512         else if (*s == '#'
3513                  && PL_lex_inpat
3514                  && !in_charclass
3515                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3516         {
3517             while (s < send && *s != '\n')
3518                 *d++ = *s++;
3519         }
3520             /* no further processing of single-quoted regex */
3521         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3522             goto default_action;
3523
3524             /* check for embedded arrays
3525              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3526              */
3527         else if (*s == '@' && s[1]) {
3528             if (UTF
3529                ? isIDFIRST_utf8_safe(s+1, send)
3530                : isWORDCHAR_A(s[1]))
3531             {
3532                 break;
3533             }
3534             if (memCHRs(":'{$", s[1]))
3535                 break;
3536             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3537                 break; /* in regexp, neither @+ nor @- are interpolated */
3538         }
3539             /* check for embedded scalars.  only stop if we're sure it's a
3540              * variable.  */
3541         else if (*s == '$') {
3542             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3543                 break;
3544             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3545                 if (s[1] == '\\') {
3546                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3547                                    "Possible unintended interpolation of $\\ in regex");
3548                 }
3549                 break;          /* in regexp, $ might be tail anchor */
3550             }
3551         }
3552
3553         /* End of else if chain - OP_TRANS rejoin rest */
3554
3555         if (UNLIKELY(s >= send)) {
3556             assert(s == send);
3557             break;
3558         }
3559
3560         /* backslashes */
3561         if (*s == '\\' && s+1 < send) {
3562             char* e;    /* Can be used for ending '}', etc. */
3563
3564             s++;
3565
3566             /* warn on \1 - \9 in substitution replacements, but note that \11
3567              * is an octal; and \19 is \1 followed by '9' */
3568             if (PL_lex_inwhat == OP_SUBST
3569                 && !PL_lex_inpat
3570                 && isDIGIT(*s)
3571                 && *s != '0'
3572                 && !isDIGIT(s[1]))
3573             {
3574                 /* diag_listed_as: \%d better written as $%d */
3575                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3576                 *--s = '$';
3577                 break;
3578             }
3579
3580             /* string-change backslash escapes */
3581             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3582                 --s;
3583                 break;
3584             }
3585             /* In a pattern, process \N, but skip any other backslash escapes.
3586              * This is because we don't want to translate an escape sequence
3587              * into a meta symbol and have the regex compiler use the meta
3588              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3589              * in spite of this, we do have to process \N here while the proper
3590              * charnames handler is in scope.  See bugs #56444 and #62056.
3591              *
3592              * There is a complication because \N in a pattern may also stand
3593              * for 'match a non-nl', and not mean a charname, in which case its
3594              * processing should be deferred to the regex compiler.  To be a
3595              * charname it must be followed immediately by a '{', and not look
3596              * like \N followed by a curly quantifier, i.e., not something like
3597              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3598              * quantifier */
3599             else if (PL_lex_inpat
3600                     && (*s != 'N'
3601                         || s[1] != '{'
3602                         || regcurly(s + 1)))
3603             {
3604                 *d++ = '\\';
3605                 goto default_action;
3606             }
3607
3608             switch (*s) {
3609             default:
3610                 {
3611                     if ((isALPHANUMERIC(*s)))
3612                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3613                                        "Unrecognized escape \\%c passed through",
3614                                        *s);
3615                     /* default action is to copy the quoted character */
3616                     goto default_action;
3617                 }
3618
3619             /* eg. \132 indicates the octal constant 0132 */
3620             case '0': case '1': case '2': case '3':
3621             case '4': case '5': case '6': case '7':
3622                 {
3623                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3624                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3625                     STRLEN len = 3;
3626                     uv = grok_oct(s, &len, &flags, NULL);
3627                     s += len;
3628                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3629                         && s < send
3630                         && isDIGIT(*s)  /* like \08, \178 */
3631                         && ckWARN(WARN_MISC))
3632                     {
3633                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3634                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3635                     }
3636                 }
3637                 goto NUM_ESCAPE_INSERT;
3638
3639             /* eg. \o{24} indicates the octal constant \024 */
3640             case 'o':
3641                 {
3642                     const char* error;
3643
3644                     if (! grok_bslash_o(&s, send,
3645                                                &uv, &error,
3646                                                NULL,
3647                                                FALSE, /* Not strict */
3648                                                FALSE, /* No illegal cp's */
3649                                                UTF))
3650                     {
3651                         yyerror(error);
3652                         uv = 0; /* drop through to ensure range ends are set */
3653                     }
3654                     goto NUM_ESCAPE_INSERT;
3655                 }
3656
3657             /* eg. \x24 indicates the hex constant 0x24 */
3658             case 'x':
3659                 {
3660                     const char* error;
3661
3662                     if (! grok_bslash_x(&s, send,
3663                                                &uv, &error,
3664                                                NULL,
3665                                                FALSE, /* Not strict */
3666                                                FALSE, /* No illegal cp's */
3667                                                UTF))
3668                     {
3669                         yyerror(error);
3670                         uv = 0; /* drop through to ensure range ends are set */
3671                     }
3672                 }
3673
3674               NUM_ESCAPE_INSERT:
3675                 /* Insert oct or hex escaped character. */
3676
3677                 /* Here uv is the ordinal of the next character being added */
3678                 if (UVCHR_IS_INVARIANT(uv)) {
3679                     *d++ = (char) uv;
3680                 }
3681                 else {
3682                     if (!d_is_utf8 && uv > 255) {
3683
3684                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3685                          * If we've only seen invariants so far, all we have to
3686                          * do is turn on the flag */
3687                         if (utf8_variant_count == 0) {
3688                             SvUTF8_on(sv);
3689                         }
3690                         else {
3691                             SvCUR_set(sv, d - SvPVX_const(sv));
3692                             SvPOK_on(sv);
3693                             *d = '\0';
3694
3695                             sv_utf8_upgrade_flags_grow(
3696                                            sv,
3697                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3698
3699                                            /* Since we're having to grow here,
3700                                             * make sure we have enough room for
3701                                             * this escape and a NUL, so the
3702                                             * code immediately below won't have
3703                                             * to actually grow again */
3704                                           UVCHR_SKIP(uv)
3705                                         + (STRLEN)(send - s) + 1);
3706                             d = SvPVX(sv) + SvCUR(sv);
3707                         }
3708
3709                         has_above_latin1 = TRUE;
3710                         d_is_utf8 = TRUE;
3711                     }
3712
3713                     if (! d_is_utf8) {
3714                         *d++ = (char)uv;
3715                         utf8_variant_count++;
3716                     }
3717                     else {
3718                        /* Usually, there will already be enough room in 'sv'
3719                         * since such escapes are likely longer than any UTF-8
3720                         * sequence they can end up as.  This isn't the case on
3721                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3722                         * UTF-8 for it contains 14.  And, we have to allow for
3723                         * a trailing NUL.  It probably can't happen on ASCII
3724                         * platforms, but be safe.  See Note on sizing above. */
3725                         const STRLEN needed = d - SvPVX(sv)
3726                                             + UVCHR_SKIP(uv)
3727                                             + (send - s)
3728                                             + 1;
3729                         if (UNLIKELY(needed > SvLEN(sv))) {
3730                             SvCUR_set(sv, d - SvPVX_const(sv));
3731                             d = SvCUR(sv) + SvGROW(sv, needed);
3732                         }
3733
3734                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3735                                                    (ckWARN(WARN_PORTABLE))
3736                                                    ? UNICODE_WARN_PERL_EXTENDED
3737                                                    : 0);
3738                     }
3739                 }
3740 #ifdef EBCDIC
3741                 non_portable_endpoint++;
3742 #endif
3743                 continue;
3744
3745             case 'N':
3746                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3747                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3748                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3749                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3750                  * convenience all three forms are referred to as "named
3751                  * characters" below.
3752                  *
3753                  * For patterns, \N also can mean to match a non-newline.  Code
3754                  * before this 'switch' statement should already have handled
3755                  * this situation, and hence this code only has to deal with
3756                  * the named character cases.
3757                  *
3758                  * For non-patterns, the named characters are converted to
3759                  * their string equivalents.  In patterns, named characters are
3760                  * not converted to their ultimate forms for the same reasons
3761                  * that other escapes aren't (mainly that the ultimate
3762                  * character could be considered a meta-symbol by the regex
3763                  * compiler).  Instead, they are converted to the \N{U+...}
3764                  * form to get the value from the charnames that is in effect
3765                  * right now, while preserving the fact that it was a named
3766                  * character, so that the regex compiler knows this.
3767                  *
3768                  * The structure of this section of code (besides checking for
3769                  * errors and upgrading to utf8) is:
3770                  *    If the named character is of the form \N{U+...}, pass it
3771                  *      through if a pattern; otherwise convert the code point
3772                  *      to utf8
3773                  *    Otherwise must be some \N{NAME}: convert to
3774                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3775                  *
3776                  * Transliteration is an exception.  The conversion to utf8 is
3777                  * only done if the code point requires it to be representable.
3778                  *
3779                  * Here, 's' points to the 'N'; the test below is guaranteed to
3780                  * succeed if we are being called on a pattern, as we already
3781                  * know from a test above that the next character is a '{'.  A
3782                  * non-pattern \N must mean 'named character', which requires
3783                  * braces */
3784                 s++;
3785                 if (*s != '{') {
3786                     yyerror("Missing braces on \\N{}");
3787                     *d++ = '\0';
3788                     continue;
3789                 }
3790                 s++;
3791
3792                 /* If there is no matching '}', it is an error. */
3793                 if (! (e = (char *) memchr(s, '}', send - s))) {
3794                     if (! PL_lex_inpat) {
3795                         yyerror("Missing right brace on \\N{}");
3796                     } else {
3797                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3798                     }
3799                     yyquit(); /* Have exhausted the input. */
3800                 }
3801
3802                 /* Here it looks like a named character */
3803
3804                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3805                     s += 2;         /* Skip to next char after the 'U+' */
3806                     if (PL_lex_inpat) {
3807
3808                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3809                         /* Check the syntax.  */
3810                         const char *orig_s;
3811                         orig_s = s - 5;
3812                         if (!isXDIGIT(*s)) {
3813                           bad_NU:
3814                             yyerror(
3815                                 "Invalid hexadecimal number in \\N{U+...}"
3816                             );
3817                             s = e + 1;
3818                             *d++ = '\0';
3819                             continue;
3820                         }
3821                         while (++s < e) {
3822                             if (isXDIGIT(*s))
3823                                 continue;
3824                             else if ((*s == '.' || *s == '_')
3825                                   && isXDIGIT(s[1]))
3826                                 continue;
3827                             goto bad_NU;
3828                         }
3829
3830                         /* Pass everything through unchanged.
3831                          * +1 is for the '}' */
3832                         Copy(orig_s, d, e - orig_s + 1, char);
3833                         d += e - orig_s + 1;
3834                     }
3835                     else {  /* Not a pattern: convert the hex to string */
3836                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3837                                   | PERL_SCAN_SILENT_ILLDIGIT
3838                                   | PERL_SCAN_SILENT_OVERFLOW
3839                                   | PERL_SCAN_DISALLOW_PREFIX;
3840                         STRLEN len = e - s;
3841
3842                         uv = grok_hex(s, &len, &flags, NULL);
3843                         if (len == 0 || (len != (STRLEN)(e - s)))
3844                             goto bad_NU;
3845
3846                         if (    uv > MAX_LEGAL_CP
3847                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3848                         {
3849                             yyerror(form_cp_too_large_msg(16, s, len, 0));
3850                             uv = 0; /* drop through to ensure range ends are
3851                                        set */
3852                         }
3853
3854                          /* For non-tr///, if the destination is not in utf8,
3855                           * unconditionally recode it to be so.  This is
3856                           * because \N{} implies Unicode semantics, and scalars
3857                           * have to be in utf8 to guarantee those semantics.
3858                           * tr/// doesn't care about Unicode rules, so no need
3859                           * there to upgrade to UTF-8 for small enough code
3860                           * points */
3861                         if (! d_is_utf8 && (   uv > 0xFF
3862                                            || PL_lex_inwhat != OP_TRANS))
3863                         {
3864                             /* See Note on sizing above.  */
3865                             const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3866
3867                             SvCUR_set(sv, d - SvPVX_const(sv));
3868                             SvPOK_on(sv);
3869                             *d = '\0';
3870
3871                             if (utf8_variant_count == 0) {
3872                                 SvUTF8_on(sv);
3873                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3874                             }
3875                             else {
3876                                 sv_utf8_upgrade_flags_grow(
3877                                                sv,
3878                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3879                                                extra);
3880                                 d = SvPVX(sv) + SvCUR(sv);
3881                             }
3882
3883                             d_is_utf8 = TRUE;
3884                             has_above_latin1 = TRUE;
3885                         }
3886
3887                         /* Add the (Unicode) code point to the output. */
3888                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3889                             *d++ = (char) LATIN1_TO_NATIVE(uv);
3890                         }
3891                         else {
3892                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
3893                                                    (ckWARN(WARN_PORTABLE))
3894                                                    ? UNICODE_WARN_PERL_EXTENDED
3895                                                    : 0);
3896                         }
3897                     }
3898                 }
3899                 else /* Here is \N{NAME} but not \N{U+...}. */
3900                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3901                 {   /* Failed.  We should die eventually, but for now use a NUL
3902                        to keep parsing */
3903                     *d++ = '\0';
3904                 }
3905                 else {  /* Successfully evaluated the name */
3906                     STRLEN len;
3907                     const char *str = SvPV_const(res, len);
3908                     if (PL_lex_inpat) {
3909
3910                         if (! len) { /* The name resolved to an empty string */
3911                             const char empty_N[] = "\\N{_}";
3912                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
3913                             d += sizeof(empty_N) - 1;
3914                         }
3915                         else {
3916                             /* In order to not lose information for the regex
3917                             * compiler, pass the result in the specially made
3918                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3919                             * the code points in hex of each character
3920                             * returned by charnames */
3921
3922                             const char *str_end = str + len;
3923                             const STRLEN off = d - SvPVX_const(sv);
3924
3925                             if (! SvUTF8(res)) {
3926                                 /* For the non-UTF-8 case, we can determine the
3927                                  * exact length needed without having to parse
3928                                  * through the string.  Each character takes up
3929                                  * 2 hex digits plus either a trailing dot or
3930                                  * the "}" */
3931                                 const char initial_text[] = "\\N{U+";
3932                                 const STRLEN initial_len = sizeof(initial_text)
3933                                                            - 1;
3934                                 d = off + SvGROW(sv, off
3935                                                     + 3 * len
3936
3937                                                     /* +1 for trailing NUL */
3938                                                     + initial_len + 1
3939
3940                                                     + (STRLEN)(send - e));
3941                                 Copy(initial_text, d, initial_len, char);
3942                                 d += initial_len;
3943                                 while (str < str_end) {
3944                                     char hex_string[4];
3945                                     int len =
3946                                         my_snprintf(hex_string,
3947                                                   sizeof(hex_string),
3948                                                   "%02X.",
3949
3950                                                   /* The regex compiler is
3951                                                    * expecting Unicode, not
3952                                                    * native */
3953                                                   NATIVE_TO_LATIN1(*str));
3954                                     PERL_MY_SNPRINTF_POST_GUARD(len,
3955                                                            sizeof(hex_string));
3956                                     Copy(hex_string, d, 3, char);
3957                                     d += 3;
3958                                     str++;
3959                                 }
3960                                 d--;    /* Below, we will overwrite the final
3961                                            dot with a right brace */
3962                             }
3963                             else {
3964                                 STRLEN char_length; /* cur char's byte length */
3965
3966                                 /* and the number of bytes after this is
3967                                  * translated into hex digits */
3968                                 STRLEN output_length;
3969
3970                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3971                                  * for max('U+', '.'); and 1 for NUL */
3972                                 char hex_string[2 * UTF8_MAXBYTES + 5];
3973
3974                                 /* Get the first character of the result. */
3975                                 U32 uv = utf8n_to_uvchr((U8 *) str,
3976                                                         len,
3977                                                         &char_length,
3978                                                         UTF8_ALLOW_ANYUV);
3979                                 /* Convert first code point to Unicode hex,
3980                                  * including the boiler plate before it. */
3981                                 output_length =
3982                                     my_snprintf(hex_string, sizeof(hex_string),
3983                                              "\\N{U+%X",
3984                                              (unsigned int) NATIVE_TO_UNI(uv));
3985
3986                                 /* Make sure there is enough space to hold it */
3987                                 d = off + SvGROW(sv, off
3988                                                     + output_length
3989                                                     + (STRLEN)(send - e)
3990                                                     + 2);       /* '}' + NUL */
3991                                 /* And output it */
3992                                 Copy(hex_string, d, output_length, char);
3993                                 d += output_length;
3994
3995                                 /* For each subsequent character, append dot and
3996                                 * its Unicode code point in hex */
3997                                 while ((str += char_length) < str_end) {
3998                                     const STRLEN off = d - SvPVX_const(sv);
3999                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4000                                                             str_end - str,
4001                                                             &char_length,
4002                                                             UTF8_ALLOW_ANYUV);
4003                                     output_length =
4004                                         my_snprintf(hex_string,
4005                                              sizeof(hex_string),
4006                                              ".%X",
4007                                              (unsigned int) NATIVE_TO_UNI(uv));
4008
4009                                     d = off + SvGROW(sv, off
4010                                                         + output_length
4011                                                         + (STRLEN)(send - e)
4012                                                         + 2);   /* '}' +  NUL */
4013                                     Copy(hex_string, d, output_length, char);
4014                                     d += output_length;
4015                                 }
4016                             }
4017
4018                             *d++ = '}'; /* Done.  Add the trailing brace */
4019                         }
4020                     }
4021                     else { /* Here, not in a pattern.  Convert the name to a
4022                             * string. */
4023
4024                         if (PL_lex_inwhat == OP_TRANS) {
4025                             str = SvPV_const(res, len);
4026                             if (len > ((SvUTF8(res))
4027                                        ? UTF8SKIP(str)
4028                                        : 1U))
4029                             {
4030                                 yyerror(Perl_form(aTHX_
4031                                     "%.*s must not be a named sequence"
4032                                     " in transliteration operator",
4033                                         /*  +1 to include the "}" */
4034                                     (int) (e + 1 - start), start));
4035                                 *d++ = '\0';
4036                                 goto end_backslash_N;
4037                             }
4038
4039                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4040                                 has_above_latin1 = TRUE;
4041                             }
4042
4043                         }
4044                         else if (! SvUTF8(res)) {
4045                             /* Make sure \N{} return is UTF-8.  This is because
4046                              * \N{} implies Unicode semantics, and scalars have
4047                              * to be in utf8 to guarantee those semantics; but
4048                              * not needed in tr/// */
4049                             sv_utf8_upgrade_flags(res, 0);
4050                             str = SvPV_const(res, len);
4051                         }
4052
4053                          /* Upgrade destination to be utf8 if this new
4054                           * component is */
4055                         if (! d_is_utf8 && SvUTF8(res)) {
4056                             /* See Note on sizing above.  */
4057                             const STRLEN extra = len + (send - s) + 1;
4058
4059                             SvCUR_set(sv, d - SvPVX_const(sv));
4060                             SvPOK_on(sv);
4061                             *d = '\0';
4062
4063                             if (utf8_variant_count == 0) {
4064                                 SvUTF8_on(sv);
4065                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4066                             }
4067                             else {
4068                                 sv_utf8_upgrade_flags_grow(sv,
4069                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4070                                                 extra);
4071                                 d = SvPVX(sv) + SvCUR(sv);
4072                             }
4073                             d_is_utf8 = TRUE;
4074                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
4075
4076                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4077                              * set correctly here). */
4078                             const STRLEN extra = len + (send - e) + 1;
4079                             const STRLEN off = d - SvPVX_const(sv);
4080                             d = off + SvGROW(sv, off + extra);
4081                         }
4082                         Copy(str, d, len, char);
4083                         d += len;
4084                     }
4085
4086                     SvREFCNT_dec(res);
4087
4088                 } /* End \N{NAME} */
4089
4090               end_backslash_N:
4091 #ifdef EBCDIC
4092                 backslash_N++; /* \N{} is defined to be Unicode */
4093 #endif
4094                 s = e + 1;  /* Point to just after the '}' */
4095                 continue;
4096
4097             /* \c is a control character */
4098             case 'c':
4099                 s++;
4100                 if (s < send) {
4101                     const char * message;
4102
4103                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4104                         yyerror(message);
4105                         yyquit();   /* Have always immediately croaked on
4106                                        errors in this */
4107                     }
4108                     d++;
4109                 }
4110                 else {
4111                     yyerror("Missing control char name in \\c");
4112                     yyquit();   /* Are at end of input, no sense continuing */
4113                 }
4114 #ifdef EBCDIC
4115                 non_portable_endpoint++;
4116 #endif
4117                 break;
4118
4119             /* printf-style backslashes, formfeeds, newlines, etc */
4120             case 'b':
4121                 *d++ = '\b';
4122                 break;
4123             case 'n':
4124                 *d++ = '\n';
4125                 break;
4126             case 'r':
4127                 *d++ = '\r';
4128                 break;
4129             case 'f':
4130                 *d++ = '\f';
4131                 break;
4132             case 't':
4133                 *d++ = '\t';
4134                 break;
4135             case 'e':
4136                 *d++ = ESC_NATIVE;
4137                 break;
4138             case 'a':
4139                 *d++ = '\a';
4140                 break;
4141             } /* end switch */
4142
4143             s++;
4144             continue;
4145         } /* end if (backslash) */
4146
4147     default_action:
4148         /* Just copy the input to the output, though we may have to convert
4149          * to/from UTF-8.
4150          *
4151          * If the input has the same representation in UTF-8 as not, it will be
4152          * a single byte, and we don't care about UTF8ness; just copy the byte */
4153         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4154             *d++ = *s++;
4155         }
4156         else if (! s_is_utf8 && ! d_is_utf8) {
4157             /* If neither source nor output is UTF-8, is also a single byte,
4158              * just copy it; but this byte counts should we later have to
4159              * convert to UTF-8 */
4160             *d++ = *s++;
4161             utf8_variant_count++;
4162         }
4163         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4164             const STRLEN len = UTF8SKIP(s);
4165
4166             /* We expect the source to have already been checked for
4167              * malformedness */
4168             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4169
4170             Copy(s, d, len, U8);
4171             d += len;
4172             s += len;
4173         }
4174         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4175             STRLEN need = send - s + 1; /* See Note on sizing above. */
4176
4177             SvCUR_set(sv, d - SvPVX_const(sv));
4178             SvPOK_on(sv);
4179             *d = '\0';
4180
4181             if (utf8_variant_count == 0) {
4182                 SvUTF8_on(sv);
4183                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4184             }
4185             else {
4186                 sv_utf8_upgrade_flags_grow(sv,
4187                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4188                                            need);
4189                 d = SvPVX(sv) + SvCUR(sv);
4190             }
4191             d_is_utf8 = TRUE;
4192             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4193         }
4194         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4195                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4196                    the input byte since we haven't incremented 's' yet. See
4197                    Note on sizing above. */
4198             const STRLEN off = d - SvPVX(sv);
4199             const STRLEN extra = 2 + (send - s - 1) + 1;
4200             if (off + extra > SvLEN(sv)) {
4201                 d = off + SvGROW(sv, off + extra);
4202             }
4203             *d++ = UTF8_EIGHT_BIT_HI(*s);
4204             *d++ = UTF8_EIGHT_BIT_LO(*s);
4205             s++;
4206         }
4207     } /* while loop to process each character */
4208
4209     {
4210         const STRLEN off = d - SvPVX(sv);
4211
4212         /* See if room for the terminating NUL */
4213         if (UNLIKELY(off >= SvLEN(sv))) {
4214
4215 #ifndef DEBUGGING
4216
4217             if (off > SvLEN(sv))
4218 #endif
4219                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4220                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4221
4222             /* Whew!  Here we don't have room for the terminating NUL, but
4223              * everything else so far has fit.  It's not too late to grow
4224              * to fit the NUL and continue on.  But it is a bug, as the code
4225              * above was supposed to have made room for this, so under
4226              * DEBUGGING builds, we panic anyway.  */
4227             d = off + SvGROW(sv, off + 1);
4228         }
4229     }
4230
4231     /* terminate the string and set up the sv */
4232     *d = '\0';
4233     SvCUR_set(sv, d - SvPVX_const(sv));
4234
4235     SvPOK_on(sv);
4236     if (d_is_utf8) {
4237         SvUTF8_on(sv);
4238     }
4239
4240     /* shrink the sv if we allocated more than we used */
4241     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4242         SvPV_shrink_to_cur(sv);
4243     }
4244
4245     /* return the substring (via pl_yylval) only if we parsed anything */
4246     if (s > start) {
4247         char *s2 = start;
4248         for (; s2 < s; s2++) {
4249             if (*s2 == '\n')
4250                 COPLINE_INC_WITH_HERELINES;
4251         }
4252         SvREFCNT_inc_simple_void_NN(sv);
4253         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4254             && ! PL_parser->lex_re_reparsing)
4255         {
4256             const char *const key = PL_lex_inpat ? "qr" : "q";
4257             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4258             const char *type;
4259             STRLEN typelen;
4260
4261             if (PL_lex_inwhat == OP_TRANS) {
4262                 type = "tr";
4263                 typelen = 2;
4264             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4265                 type = "s";
4266                 typelen = 1;
4267             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4268                 type = "q";
4269                 typelen = 1;
4270             } else {
4271                 type = "qq";
4272                 typelen = 2;
4273             }
4274
4275             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4276                                 type, typelen, NULL);
4277         }
4278         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4279     }
4280     LEAVE_with_name("scan_const");
4281     return s;
4282 }
4283
4284 /* S_intuit_more
4285  * Returns TRUE if there's more to the expression (e.g., a subscript),
4286  * FALSE otherwise.
4287  *
4288  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4289  *
4290  * ->[ and ->{ return TRUE
4291  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4292  * { and [ outside a pattern are always subscripts, so return TRUE
4293  * if we're outside a pattern and it's not { or [, then return FALSE
4294  * if we're in a pattern and the first char is a {
4295  *   {4,5} (any digits around the comma) returns FALSE
4296  * if we're in a pattern and the first char is a [
4297  *   [] returns FALSE
4298  *   [SOMETHING] has a funky algorithm to decide whether it's a
4299  *      character class or not.  It has to deal with things like
4300  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4301  * anything else returns TRUE
4302  */
4303
4304 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4305
4306 STATIC int
4307 S_intuit_more(pTHX_ char *s, char *e)
4308 {
4309     PERL_ARGS_ASSERT_INTUIT_MORE;
4310
4311     if (PL_lex_brackets)
4312         return TRUE;
4313     if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4314         return TRUE;
4315     if (*s == '-' && s[1] == '>'
4316      && FEATURE_POSTDEREF_QQ_IS_ENABLED
4317      && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4318         ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4319         return TRUE;
4320     if (*s != '{' && *s != '[')
4321         return FALSE;
4322     PL_parser->sub_no_recover = TRUE;
4323     if (!PL_lex_inpat)
4324         return TRUE;
4325
4326     /* In a pattern, so maybe we have {n,m}. */
4327     if (*s == '{') {
4328         if (regcurly(s)) {
4329             return FALSE;
4330         }
4331         return TRUE;
4332     }
4333
4334     /* On the other hand, maybe we have a character class */
4335
4336     s++;
4337     if (*s == ']' || *s == '^')
4338         return FALSE;
4339     else {
4340         /* this is terrifying, and it works */
4341         int weight;
4342         char seen[256];
4343         const char * const send = (char *) memchr(s, ']', e - s);
4344         unsigned char un_char, last_un_char;
4345         char tmpbuf[sizeof PL_tokenbuf * 4];
4346
4347         if (!send)              /* has to be an expression */
4348             return TRUE;
4349         weight = 2;             /* let's weigh the evidence */
4350
4351         if (*s == '$')
4352             weight -= 3;
4353         else if (isDIGIT(*s)) {
4354             if (s[1] != ']') {
4355                 if (isDIGIT(s[1]) && s[2] == ']')
4356                     weight -= 10;
4357             }
4358             else
4359                 weight -= 100;
4360         }
4361         Zero(seen,256,char);
4362         un_char = 255;
4363         for (; s < send; s++) {
4364             last_un_char = un_char;
4365             un_char = (unsigned char)*s;
4366             switch (*s) {
4367             case '@':
4368             case '&':
4369             case '$':
4370                 weight -= seen[un_char] * 10;
4371                 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4372                     int len;
4373                     scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4374                     len = (int)strlen(tmpbuf);
4375                     if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4376                                                     UTF ? SVf_UTF8 : 0, SVt_PV))
4377                         weight -= 100;
4378                     else
4379                         weight -= 10;
4380                 }
4381                 else if (*s == '$'
4382                          && s[1]
4383                          && memCHRs("[#!%*<>()-=",s[1]))
4384                 {
4385                     if (/*{*/ memCHRs("])} =",s[2]))
4386                         weight -= 10;
4387                     else
4388                         weight -= 1;
4389                 }
4390                 break;
4391             case '\\':
4392                 un_char = 254;
4393                 if (s[1]) {
4394                     if (memCHRs("wds]",s[1]))
4395                         weight += 100;
4396                     else if (seen[(U8)'\''] || seen[(U8)'"'])
4397                         weight += 1;
4398                     else if (memCHRs("rnftbxcav",s[1]))
4399                         weight += 40;
4400                     else if (isDIGIT(s[1])) {
4401                         weight += 40;
4402                         while (s[1] && isDIGIT(s[1]))
4403                             s++;
4404                     }
4405                 }
4406                 else
4407                     weight += 100;
4408                 break;
4409             case '-':
4410                 if (s[1] == '\\')
4411                     weight += 50;
4412                 if (memCHRs("aA01! ",last_un_char))
4413                     weight += 30;
4414                 if (memCHRs("zZ79~",s[1]))
4415                     weight += 30;
4416                 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4417                     weight -= 5;        /* cope with negative subscript */
4418                 break;
4419             default:
4420                 if (!isWORDCHAR(last_un_char)
4421                     && !(last_un_char == '$' || last_un_char == '@'
4422                          || last_un_char == '&')
4423                     && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4424                     char *d = s;
4425                     while (isALPHA(*s))
4426                         s++;
4427                     if (keyword(d, s - d, 0))
4428                         weight -= 150;
4429                 }
4430                 if (un_char == last_un_char + 1)
4431                     weight += 5;
4432                 weight -= seen[un_char];
4433                 break;
4434             }
4435             seen[un_char]++;
4436         }
4437         if (weight >= 0)        /* probably a character class */
4438             return FALSE;
4439     }
4440
4441     return TRUE;
4442 }
4443
4444 /*
4445  * S_intuit_method
4446  *
4447  * Does all the checking to disambiguate
4448  *   foo bar
4449  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4450  * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4451  *
4452  * First argument is the stuff after the first token, e.g. "bar".
4453  *
4454  * Not a method if foo is a filehandle.
4455  * Not a method if foo is a subroutine prototyped to take a filehandle.
4456  * Not a method if it's really "Foo $bar"
4457  * Method if it's "foo $bar"
4458  * Not a method if it's really "print foo $bar"
4459  * Method if it's really "foo package::" (interpreted as package->foo)
4460  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4461  * Not a method if bar is a filehandle or package, but is quoted with
4462  *   =>
4463  */
4464
4465 STATIC int
4466 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4467 {
4468     char *s = start + (*start == '$');
4469     char tmpbuf[sizeof PL_tokenbuf];
4470     STRLEN len;
4471     GV* indirgv;
4472         /* Mustn't actually add anything to a symbol table.
4473            But also don't want to "initialise" any placeholder
4474            constants that might already be there into full
4475            blown PVGVs with attached PVCV.  */
4476     GV * const gv =
4477         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4478
4479     PERL_ARGS_ASSERT_INTUIT_METHOD;
4480
4481     if (!FEATURE_INDIRECT_IS_ENABLED)
4482         return 0;
4483
4484     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4485             return 0;
4486     if (cv && SvPOK(cv)) {
4487         const char *proto = CvPROTO(cv);
4488         if (proto) {
4489             while (*proto && (isSPACE(*proto) || *proto == ';'))
4490                 proto++;
4491             if (*proto == '*')
4492                 return 0;
4493         }
4494     }
4495
4496     if (*start == '$') {
4497         SSize_t start_off = start - SvPVX(PL_linestr);
4498         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4499             || isUPPER(*PL_tokenbuf))
4500             return 0;
4501         /* this could be $# */
4502         if (isSPACE(*s))
4503             s = skipspace(s);
4504         PL_bufptr = SvPVX(PL_linestr) + start_off;
4505         PL_expect = XREF;
4506         return *s == '(' ? FUNCMETH : METHOD;
4507     }
4508
4509     s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4510     /* start is the beginning of the possible filehandle/object,
4511      * and s is the end of it
4512      * tmpbuf is a copy of it (but with single quotes as double colons)
4513      */
4514
4515     if (!keyword(tmpbuf, len, 0)) {
4516         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4517             len -= 2;
4518             tmpbuf[len] = '\0';
4519             goto bare_package;
4520         }
4521         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4522                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4523                                     SVt_PVCV);
4524         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4525          && (!isGV(indirgv) || GvCVu(indirgv)))
4526             return 0;
4527         /* filehandle or package name makes it a method */
4528         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4529             s = skipspace(s);
4530             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4531                 return 0;       /* no assumptions -- "=>" quotes bareword */
4532       bare_package:
4533             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4534                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4535             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4536             PL_expect = XTERM;
4537             force_next(BAREWORD);
4538             PL_bufptr = s;
4539             return *s == '(' ? FUNCMETH : METHOD;
4540         }
4541     }
4542     return 0;
4543 }
4544
4545 /* Encoded script support. filter_add() effectively inserts a
4546  * 'pre-processing' function into the current source input stream.
4547  * Note that the filter function only applies to the current source file
4548  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4549  *
4550  * The datasv parameter (which may be NULL) can be used to pass
4551  * private data to this instance of the filter. The filter function
4552  * can recover the SV using the FILTER_DATA macro and use it to
4553  * store private buffers and state information.
4554  *
4555  * The supplied datasv parameter is upgraded to a PVIO type
4556  * and the IoDIRP/IoANY field is used to store the function pointer,
4557  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4558  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4559  * private use must be set using malloc'd pointers.
4560  */
4561
4562 SV *
4563 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4564 {
4565     if (!funcp)
4566         return NULL;
4567
4568     if (!PL_parser)
4569         return NULL;
4570
4571     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4572         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4573
4574     if (!PL_rsfp_filters)
4575         PL_rsfp_filters = newAV();
4576     if (!datasv)
4577         datasv = newSV(0);
4578     SvUPGRADE(datasv, SVt_PVIO);
4579     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4580     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4581     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4582                           FPTR2DPTR(void *, IoANY(datasv)),
4583                           SvPV_nolen(datasv)));
4584     av_unshift(PL_rsfp_filters, 1);
4585     av_store(PL_rsfp_filters, 0, datasv) ;
4586     if (
4587         !PL_parser->filtered
4588      && PL_parser->lex_flags & LEX_EVALBYTES
4589      && PL_bufptr < PL_bufend
4590     ) {
4591         const char *s = PL_bufptr;
4592         while (s < PL_bufend) {
4593             if (*s == '\n') {
4594                 SV *linestr = PL_parser->linestr;
4595                 char *buf = SvPVX(linestr);
4596                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4597                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4598                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4599                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4600                 STRLEN const last_uni_pos =
4601                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4602                 STRLEN const last_lop_pos =
4603                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4604                 av_push(PL_rsfp_filters, linestr);
4605                 PL_parser->linestr =
4606                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4607                 buf = SvPVX(PL_parser->linestr);
4608                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4609                 PL_parser->bufptr = buf + bufptr_pos;
4610                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4611                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4612                 PL_parser->linestart = buf + linestart_pos;
4613                 if (PL_parser->last_uni)
4614                     PL_parser->last_uni = buf + last_uni_pos;
4615                 if (PL_parser->last_lop)
4616                     PL_parser->last_lop = buf + last_lop_pos;
4617                 SvLEN_set(linestr, SvCUR(linestr));
4618                 SvCUR_set(linestr, s - SvPVX(linestr));
4619                 PL_parser->filtered = 1;
4620                 break;
4621             }
4622             s++;
4623         }
4624     }
4625     return(datasv);
4626 }
4627
4628
4629 /* Delete most recently added instance of this filter function. */
4630 void
4631 Perl_filter_del(pTHX_ filter_t funcp)
4632 {
4633     SV *datasv;
4634
4635     PERL_ARGS_ASSERT_FILTER_DEL;
4636
4637 #ifdef DEBUGGING
4638     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4639                           FPTR2DPTR(void*, funcp)));
4640 #endif
4641     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4642         return;
4643     /* if filter is on top of stack (usual case) just pop it off */
4644     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4645     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4646         sv_free(av_pop(PL_rsfp_filters));
4647
4648         return;
4649     }
4650     /* we need to search for the correct entry and clear it     */
4651     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4652 }
4653
4654
4655 /* Invoke the idxth filter function for the current rsfp.        */
4656 /* maxlen 0 = read one text line */
4657 I32
4658 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4659 {
4660     filter_t funcp;
4661     I32 ret;
4662     SV *datasv = NULL;
4663     /* This API is bad. It should have been using unsigned int for maxlen.
4664        Not sure if we want to change the API, but if not we should sanity
4665        check the value here.  */
4666     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4667
4668     PERL_ARGS_ASSERT_FILTER_READ;
4669
4670     if (!PL_parser || !PL_rsfp_filters)
4671         return -1;
4672     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4673         /* Provide a default input filter to make life easy.    */
4674         /* Note that we append to the line. This is handy.      */
4675         DEBUG_P(PerlIO_printf(Perl_debug_log,
4676                               "filter_read %d: from rsfp\n", idx));
4677         if (correct_length) {
4678             /* Want a block */
4679             int len ;
4680             const int old_len = SvCUR(buf_sv);
4681
4682             /* ensure buf_sv is large enough */
4683             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4684             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4685                                    correct_length)) <= 0) {
4686                 if (PerlIO_error(PL_rsfp))
4687                     return -1;          /* error */
4688                 else
4689                     return 0 ;          /* end of file */
4690             }
4691             SvCUR_set(buf_sv, old_len + len) ;
4692             SvPVX(buf_sv)[old_len + len] = '\0';
4693         } else {
4694             /* Want a line */
4695             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4696                 if (PerlIO_error(PL_rsfp))
4697                     return -1;          /* error */
4698                 else
4699                     return 0 ;          /* end of file */
4700             }
4701         }
4702         return SvCUR(buf_sv);
4703     }
4704     /* Skip this filter slot if filter has been deleted */
4705     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4706         DEBUG_P(PerlIO_printf(Perl_debug_log,
4707                               "filter_read %d: skipped (filter deleted)\n",
4708                               idx));
4709         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4710     }
4711     if (SvTYPE(datasv) != SVt_PVIO) {
4712         if (correct_length) {
4713             /* Want a block */
4714             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4715             if (!remainder) return 0; /* eof */
4716             if (correct_length > remainder) correct_length = remainder;
4717             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4718             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4719         } else {
4720             /* Want a line */
4721             const char *s = SvEND(datasv);
4722             const char *send = SvPVX(datasv) + SvLEN(datasv);
4723             while (s < send) {
4724                 if (*s == '\n') {
4725                     s++;
4726                     break;
4727                 }
4728                 s++;
4729             }
4730             if (s == send) return 0; /* eof */
4731             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4732             SvCUR_set(datasv, s-SvPVX(datasv));
4733         }
4734         return SvCUR(buf_sv);
4735     }
4736     /* Get function pointer hidden within datasv        */
4737     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4738     DEBUG_P(PerlIO_printf(Perl_debug_log,
4739                           "filter_read %d: via function %p (%s)\n",
4740                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
4741     /* Call function. The function is expected to       */
4742     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
4743     /* Return: <0:error, =0:eof, >0:not eof             */
4744     ENTER;
4745     save_scalar(PL_errgv);
4746     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4747     LEAVE;
4748     return ret;
4749 }
4750
4751 STATIC char *
4752 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4753 {
4754     PERL_ARGS_ASSERT_FILTER_GETS;
4755
4756 #ifdef PERL_CR_FILTER
4757     if (!PL_rsfp_filters) {
4758         filter_add(S_cr_textfilter,NULL);
4759     }
4760 #endif
4761     if (PL_rsfp_filters) {
4762         if (!append)
4763             SvCUR_set(sv, 0);   /* start with empty line        */
4764         if (FILTER_READ(0, sv, 0) > 0)
4765             return ( SvPVX(sv) ) ;
4766         else
4767             return NULL ;
4768     }
4769     else
4770         return (sv_gets(sv, PL_rsfp, append));
4771 }
4772
4773 STATIC HV *
4774 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4775 {
4776     GV *gv;
4777
4778     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4779
4780     if (memEQs(pkgname, len, "__PACKAGE__"))
4781         return PL_curstash;
4782
4783     if (len > 2
4784         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4785         && (gv = gv_fetchpvn_flags(pkgname,
4786                                    len,
4787                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4788     {
4789         return GvHV(gv);                        /* Foo:: */
4790     }
4791
4792     /* use constant CLASS => 'MyClass' */
4793     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4794     if (gv && GvCV(gv)) {
4795         SV * const sv = cv_const_sv(GvCV(gv));
4796         if (sv)
4797             return gv_stashsv(sv, 0);
4798     }
4799
4800     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4801 }
4802
4803
4804 STATIC char *
4805 S_tokenize_use(pTHX_ int is_use, char *s) {
4806     PERL_ARGS_ASSERT_TOKENIZE_USE;
4807
4808     if (PL_expect != XSTATE)
4809         /* diag_listed_as: "use" not allowed in expression */
4810         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4811                     is_use ? "use" : "no"));
4812     PL_expect = XTERM;
4813     s = skipspace(s);
4814     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4815         s = force_version(s, TRUE);
4816         if (*s == ';' || *s == '}'
4817                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4818             NEXTVAL_NEXTTOKE.opval = NULL;
4819             force_next(BAREWORD);
4820         }
4821         else if (*s == 'v') {
4822             s = force_word(s,BAREWORD,FALSE,TRUE);
4823             s = force_version(s, FALSE);
4824         }
4825     }
4826     else {
4827         s = force_word(s,BAREWORD,FALSE,TRUE);
4828         s = force_version(s, FALSE);
4829     }
4830     pl_yylval.ival = is_use;
4831     return s;
4832 }
4833 #ifdef DEBUGGING
4834     static const char* const exp_name[] =
4835         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4836           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4837           "SIGVAR", "TERMORDORDOR"
4838         };
4839 #endif
4840
4841 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4842 STATIC bool
4843 S_word_takes_any_delimiter(char *p, STRLEN len)
4844 {
4845     return (len == 1 && memCHRs("msyq", p[0]))
4846             || (len == 2
4847                 && ((p[0] == 't' && p[1] == 'r')
4848                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4849 }
4850
4851 static void
4852 S_check_scalar_slice(pTHX_ char *s)
4853 {
4854     s++;
4855     while (SPACE_OR_TAB(*s)) s++;
4856     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4857                                                              PL_bufend,
4858                                                              UTF))
4859     {
4860         return;
4861     }
4862     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4863            || (*s && memCHRs(" \t$#+-'\"", *s)))
4864     {
4865         s += UTF ? UTF8SKIP(s) : 1;
4866     }
4867     if (*s == '}' || *s == ']')
4868         pl_yylval.ival = OPpSLICEWARNING;
4869 }
4870
4871 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4872 static void
4873 S_lex_token_boundary(pTHX)
4874 {
4875     PL_oldoldbufptr = PL_oldbufptr;
4876     PL_oldbufptr = PL_bufptr;
4877 }
4878
4879 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4880 static char *
4881 S_vcs_conflict_marker(pTHX_ char *s)
4882 {
4883     lex_token_boundary();
4884     PL_bufptr = s;
4885     yyerror("Version control conflict marker");
4886     while (s < PL_bufend && *s != '\n')
4887         s++;
4888     return s;
4889 }
4890
4891 static int
4892 yyl_sigvar(pTHX_ char *s)
4893 {
4894     /* we expect the sigil and optional var name part of a
4895      * signature element here. Since a '$' is not necessarily
4896      * followed by a var name, handle it specially here; the general
4897      * yylex code would otherwise try to interpret whatever follows
4898      * as a var; e.g. ($, ...) would be seen as the var '$,'
4899      */
4900
4901     U8 sigil;
4902
4903     s = skipspace(s);
4904     sigil = *s++;
4905     PL_bufptr = s; /* for error reporting */
4906     switch (sigil) {
4907     case '$':
4908     case '@':
4909     case '%':
4910         /* spot stuff that looks like an prototype */
4911         if (memCHRs("$:@%&*;\\[]", *s)) {
4912             yyerror("Illegal character following sigil in a subroutine signature");
4913             break;
4914         }
4915         /* '$#' is banned, while '$ # comment' isn't */
4916         if (*s == '#') {
4917             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4918             break;
4919         }
4920         s = skipspace(s);
4921         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4922             char *dest = PL_tokenbuf + 1;
4923             /* read var name, including sigil, into PL_tokenbuf */
4924             PL_tokenbuf[0] = sigil;
4925             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4926                 0, cBOOL(UTF), FALSE, FALSE);
4927             *dest = '\0';
4928             assert(PL_tokenbuf[1]); /* we have a variable name */
4929         }
4930         else {
4931             *PL_tokenbuf = 0;
4932             PL_in_my = 0;
4933         }
4934
4935         s = skipspace(s);
4936         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4937          * as the ASSIGNOP, and exclude other tokens that start with =
4938          */
4939         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
4940             /* save now to report with the same context as we did when
4941              * all ASSIGNOPS were accepted */
4942             PL_oldbufptr = s;
4943
4944             ++s;
4945             NEXTVAL_NEXTTOKE.ival = 0;
4946             force_next(ASSIGNOP);
4947             PL_expect = XTERM;
4948         }
4949         else if (*s == ',' || *s == ')') {
4950             PL_expect = XOPERATOR;
4951         }
4952         else {
4953             /* make sure the context shows the unexpected character and
4954              * hopefully a bit more */
4955             if (*s) ++s;
4956             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4957                 s++;
4958             PL_bufptr = s; /* for error reporting */
4959             yyerror("Illegal operator following parameter in a subroutine signature");
4960             PL_in_my = 0;
4961         }
4962         if (*PL_tokenbuf) {
4963             NEXTVAL_NEXTTOKE.ival = sigil;
4964             force_next('p'); /* force a signature pending identifier */
4965         }
4966         break;
4967
4968     case ')':
4969         PL_expect = XBLOCK;
4970         break;
4971     case ',': /* handle ($a,,$b) */
4972         break;
4973
4974     default:
4975         PL_in_my = 0;
4976         yyerror("A signature parameter must start with '$', '@' or '%'");
4977         /* very crude error recovery: skip to likely next signature
4978          * element */
4979         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4980             s++;
4981         break;
4982     }
4983
4984     TOKEN(sigil);
4985 }
4986
4987 static int
4988 yyl_dollar(pTHX_ char *s)
4989 {
4990     CLINE;
4991
4992     if (PL_expect == XPOSTDEREF) {
4993         if (s[1] == '#') {
4994             s++;
4995             POSTDEREF(DOLSHARP);
4996         }
4997         POSTDEREF('$');
4998     }
4999
5000     if (   s[1] == '#'
5001         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5002             || memCHRs("{$:+-@", s[2])))
5003     {
5004         PL_tokenbuf[0] = '@';
5005         s = scan_ident(s + 1, PL_tokenbuf + 1,
5006                        sizeof PL_tokenbuf - 1, FALSE);
5007         if (PL_expect == XOPERATOR) {
5008             char *d = s;
5009             if (PL_bufptr > s) {
5010                 d = PL_bufptr-1;
5011                 PL_bufptr = PL_oldbufptr;
5012             }
5013             no_op("Array length", d);
5014         }
5015         if (!PL_tokenbuf[1])
5016             PREREF(DOLSHARP);
5017         PL_expect = XOPERATOR;
5018         force_ident_maybe_lex('#');
5019         TOKEN(DOLSHARP);
5020     }
5021
5022     PL_tokenbuf[0] = '$';
5023     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5024     if (PL_expect == XOPERATOR) {
5025         char *d = s;
5026         if (PL_bufptr > s) {
5027             d = PL_bufptr-1;
5028             PL_bufptr = PL_oldbufptr;
5029         }
5030         no_op("Scalar", d);
5031     }
5032     if (!PL_tokenbuf[1]) {
5033         if (s == PL_bufend)
5034             yyerror("Final $ should be \\$ or $name");
5035         PREREF('$');
5036     }
5037
5038     {
5039         const char tmp = *s;
5040         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5041             s = skipspace(s);
5042
5043         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5044             && intuit_more(s, PL_bufend)) {
5045             if (*s == '[') {
5046                 PL_tokenbuf[0] = '@';
5047                 if (ckWARN(WARN_SYNTAX)) {
5048                     char *t = s+1;
5049
5050                     while ( t < PL_bufend ) {
5051                         if (isSPACE(*t)) {
5052                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5053                             /* consumed one or more space chars */
5054                         } else if (*t == '$' || *t == '@') {
5055                             /* could be more than one '$' like $$ref or @$ref */
5056                             do { t++; } while (t < PL_bufend && *t == '$');
5057
5058                             /* could be an abigail style identifier like $ foo */
5059                             while (t < PL_bufend && *t == ' ') t++;
5060
5061                             /* strip off the name of the var */
5062                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5063                                 t += UTF ? UTF8SKIP(t) : 1;
5064                             /* consumed a varname */
5065                         } else if (isDIGIT(*t)) {
5066                             /* deal with hex constants like 0x11 */
5067                             if (t[0] == '0' && t[1] == 'x') {
5068                                 t += 2;
5069                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5070                             } else {
5071                                 /* deal with decimal/octal constants like 1 and 0123 */
5072                                 do { t++; } while (isDIGIT(*t));
5073                                 if (t<PL_bufend && *t == '.') {
5074                                     do { t++; } while (isDIGIT(*t));
5075                                 }
5076                             }
5077                             /* consumed a number */
5078                         } else {
5079                             /* not a var nor a space nor a number */
5080                             break;
5081                         }
5082                     }
5083                     if (t < PL_bufend && *t++ == ',') {
5084                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5085                         while (t < PL_bufend && *t != ']')
5086                             t++;
5087                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5088                                     "Multidimensional syntax %" UTF8f " not supported",
5089                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5090                     }
5091                 }
5092             }
5093             else if (*s == '{') {
5094                 char *t;
5095                 PL_tokenbuf[0] = '%';
5096                 if (    strEQ(PL_tokenbuf+1, "SIG")
5097                     && ckWARN(WARN_SYNTAX)
5098                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5099                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5100                 {
5101                     char tmpbuf[sizeof PL_tokenbuf];
5102                     do {
5103                         t++;
5104                     } while (isSPACE(*t));
5105                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5106                         STRLEN len;
5107                         t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5108                                         &len);
5109                         while (isSPACE(*t))
5110                             t++;
5111                         if (  *t == ';'
5112                             && get_cvn_flags(tmpbuf, len, UTF
5113                                                             ? SVf_UTF8
5114                                                             : 0))
5115                         {
5116                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5117                                 "You need to quote \"%" UTF8f "\"",
5118                                     UTF8fARG(UTF, len, tmpbuf));
5119                         }
5120                     }
5121                 }
5122             }
5123         }
5124
5125         PL_expect = XOPERATOR;
5126         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5127             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5128             if (!islop || PL_last_lop_op == OP_GREPSTART)
5129                 PL_expect = XOPERATOR;
5130             else if (memCHRs("$@\"'`q", *s))
5131                 PL_expect = XTERM;              /* e.g. print $fh "foo" */
5132             else if (   memCHRs("&*<%", *s)
5133                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5134             {
5135                 PL_expect = XTERM;              /* e.g. print $fh &sub */
5136             }
5137             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5138                 char tmpbuf[sizeof PL_tokenbuf];
5139                 int t2;
5140                 STRLEN len;
5141                 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5142                 if ((t2 = keyword(tmpbuf, len, 0))) {
5143                     /* binary operators exclude handle interpretations */
5144                     switch (t2) {
5145                     case -KEY_x:
5146                     case -KEY_eq:
5147                     case -KEY_ne:
5148                     case -KEY_gt:
5149                     case -KEY_lt:
5150                     case -KEY_ge:
5151                     case -KEY_le:
5152                     case -KEY_cmp:
5153                         break;
5154                     default:
5155                         PL_expect = XTERM;      /* e.g. print $fh length() */
5156                         break;
5157                     }
5158                 }
5159                 else {
5160                     PL_expect = XTERM;  /* e.g. print $fh subr() */
5161                 }
5162             }
5163             else if (isDIGIT(*s))
5164                 PL_expect = XTERM;              /* e.g. print $fh 3 */
5165             else if (*s == '.' && isDIGIT(s[1]))
5166                 PL_expect = XTERM;              /* e.g. print $fh .3 */
5167             else if ((*s == '?' || *s == '-' || *s == '+')
5168                      && !isSPACE(s[1]) && s[1] != '=')
5169                 PL_expect = XTERM;              /* e.g. print $fh -1 */
5170             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5171                      && s[1] != '/')
5172                 PL_expect = XTERM;              /* e.g. print $fh /.../
5173                                                XXX except DORDOR operator
5174                                             */
5175             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5176                      && s[2] != '=')
5177                 PL_expect = XTERM;              /* print $fh <<"EOF" */
5178         }
5179     }
5180     force_ident_maybe_lex('$');
5181     TOKEN('$');
5182 }
5183
5184 static int
5185 yyl_sub(pTHX_ char *s, const int key)
5186 {
5187     char * const tmpbuf = PL_tokenbuf + 1;
5188     bool have_name, have_proto;
5189     STRLEN len;
5190     SV *format_name = NULL;
5191     bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5192
5193     SSize_t off = s-SvPVX(PL_linestr);
5194     char *d;
5195
5196     s = skipspace(s); /* can move PL_linestr */
5197
5198     d = SvPVX(PL_linestr)+off;
5199
5200     SAVEBOOL(PL_parser->sig_seen);
5201     PL_parser->sig_seen = FALSE;
5202
5203     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5204         || *s == '\''
5205         || (*s == ':' && s[1] == ':'))
5206     {
5207
5208         PL_expect = XATTRBLOCK;
5209         d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5210                       &len);
5211         if (key == KEY_format)
5212             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5213         *PL_tokenbuf = '&';
5214         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5215          || pad_findmy_pvn(
5216                 PL_tokenbuf, len + 1, 0
5217             ) != NOT_IN_PAD)
5218             sv_setpvn(PL_subname, tmpbuf, len);
5219         else {
5220             sv_setsv(PL_subname,PL_curstname);
5221             sv_catpvs(PL_subname,"::");
5222             sv_catpvn(PL_subname,tmpbuf,len);
5223         }
5224         if (SvUTF8(PL_linestr))
5225             SvUTF8_on(PL_subname);
5226         have_name = TRUE;
5227
5228         s = skipspace(d);
5229     }
5230     else {
5231         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5232             *d = '\0';
5233             /* diag_listed_as: Missing name in "%s sub" */
5234             Perl_croak(aTHX_
5235                       "Missing name in \"%s\"", PL_bufptr);
5236         }
5237         PL_expect = XATTRTERM;
5238         sv_setpvs(PL_subname,"?");
5239         have_name = FALSE;
5240     }
5241
5242     if (key == KEY_format) {
5243         if (format_name) {
5244             NEXTVAL_NEXTTOKE.opval
5245                 = newSVOP(OP_CONST,0, format_name);
5246             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5247             force_next(BAREWORD);
5248         }
5249         PREBLOCK(FORMAT);
5250     }
5251
5252     /* Look for a prototype */
5253     if (*s == '(' && !is_sigsub) {
5254         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5255         if (!s)
5256             Perl_croak(aTHX_ "Prototype not terminated");
5257         COPLINE_SET_FROM_MULTI_END;
5258         (void)validate_proto(PL_subname, PL_lex_stuff,
5259                              ckWARN(WARN_ILLEGALPROTO), 0);
5260         have_proto = TRUE;
5261
5262         s = skipspace(s);
5263     }
5264     else
5265         have_proto = FALSE;
5266
5267     if (  !(*s == ':' && s[1] != ':')
5268         && (*s != '{' && *s != '(') && key != KEY_format)
5269     {
5270         assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5271                key == KEY_DESTROY || key == KEY_BEGIN ||
5272                key == KEY_UNITCHECK || key == KEY_CHECK ||
5273                key == KEY_INIT || key == KEY_END ||
5274                key == KEY_my || key == KEY_state ||
5275                key == KEY_our);
5276         if (!have_name)
5277             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5278         else if (*s != ';' && *s != '}')
5279             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5280     }
5281
5282     if (have_proto) {
5283         NEXTVAL_NEXTTOKE.opval =
5284             newSVOP(OP_CONST, 0, PL_lex_stuff);
5285         PL_lex_stuff = NULL;
5286         force_next(THING);
5287     }
5288     if (!have_name) {
5289         if (PL_curstash)
5290             sv_setpvs(PL_subname, "__ANON__");
5291         else
5292             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5293         if (is_sigsub)
5294             TOKEN(ANON_SIGSUB);
5295         else
5296             TOKEN(ANONSUB);
5297     }
5298     force_ident_maybe_lex('&');
5299     if (is_sigsub)
5300         TOKEN(SIGSUB);
5301     else
5302         TOKEN(SUB);
5303 }
5304
5305 static int
5306 yyl_interpcasemod(pTHX_ char *s)
5307 {
5308 #ifdef DEBUGGING
5309     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5310         Perl_croak(aTHX_
5311                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5312                    PL_bufptr, PL_bufend, *PL_bufptr);
5313 #endif
5314
5315     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5316         /* if at a \E */
5317         if (PL_lex_casemods) {
5318             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5319             PL_lex_casestack[PL_lex_casemods] = '\0';
5320
5321             if (PL_bufptr != PL_bufend
5322                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5323                     || oldmod == 'F')) {
5324                 PL_bufptr += 2;
5325                 PL_lex_state = LEX_INTERPCONCAT;
5326             }
5327             PL_lex_allbrackets--;
5328             return REPORT(')');
5329         }
5330         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5331            /* Got an unpaired \E */
5332            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5333                     "Useless use of \\E");
5334         }
5335         if (PL_bufptr != PL_bufend)
5336             PL_bufptr += 2;
5337         PL_lex_state = LEX_INTERPCONCAT;
5338         return yylex();
5339     }
5340     else {
5341         DEBUG_T({
5342             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5343         });
5344         s = PL_bufptr + 1;
5345         if (s[1] == '\\' && s[2] == 'E') {
5346             PL_bufptr = s + 3;
5347             PL_lex_state = LEX_INTERPCONCAT;
5348             return yylex();
5349         }
5350         else {
5351             I32 tmp;
5352             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5353                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5354             {
5355                 tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
5356             }
5357             if ((*s == 'L' || *s == 'U' || *s == 'F')
5358                 && (strpbrk(PL_lex_casestack, "LUF")))
5359             {
5360                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5361                 PL_lex_allbrackets--;
5362                 return REPORT(')');
5363             }
5364             if (PL_lex_casemods > 10)
5365                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5366             PL_lex_casestack[PL_lex_casemods++] = *s;
5367             PL_lex_casestack[PL_lex_casemods] = '\0';
5368             PL_lex_state = LEX_INTERPCONCAT;
5369             NEXTVAL_NEXTTOKE.ival = 0;
5370             force_next((2<<24)|'(');
5371             if (*s == 'l')
5372                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5373             else if (*s == 'u')
5374                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5375             else if (*s == 'L')
5376                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5377             else if (*s == 'U')
5378                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5379             else if (*s == 'Q')
5380                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5381             else if (*s == 'F')
5382                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5383             else
5384                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5385             PL_bufptr = s + 1;
5386         }
5387         force_next(FUNC);
5388         if (PL_lex_starts) {
5389             s = PL_bufptr;
5390             PL_lex_starts = 0;
5391             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5392             if (PL_lex_casemods == 1 && PL_lex_inpat)
5393                 TOKEN(',');
5394             else
5395                 AopNOASSIGN(OP_CONCAT);
5396         }
5397         else
5398             return yylex();
5399     }
5400 }
5401
5402 static int
5403 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5404                         GV **pgv, GV ***pgvp)
5405 {
5406     GV *ogv = NULL;     /* override (winner) */
5407     GV *hgv = NULL;     /* hidden (loser) */
5408     GV *gv = *pgv;
5409
5410     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5411         CV *cv;
5412         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5413                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5414                                     SVt_PVCV))
5415             && (cv = GvCVu(gv)))
5416         {
5417             if (GvIMPORTED_CV(gv))
5418                 ogv = gv;
5419             else if (! CvMETHOD(cv))
5420                 hgv = gv;
5421         }
5422         if (!ogv
5423             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5424             && (gv = **pgvp)
5425             && (isGV_with_GP(gv)
5426                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5427                 :   SvPCS_IMPORTED(gv)
5428                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5429                                                          len, 0), 1)))
5430         {
5431             ogv = gv;
5432         }
5433     }
5434
5435     *pgv = gv;
5436
5437     if (ogv) {
5438         *orig_keyword = key;
5439         return 0;               /* overridden by import or by GLOBAL */
5440     }
5441     else if (gv && !*pgvp
5442              && -key==KEY_lock  /* XXX generalizable kludge */
5443              && GvCVu(gv))
5444     {
5445         return 0;               /* any sub overrides "weak" keyword */
5446     }
5447     else {                      /* no override */
5448         key = -key;
5449         if (key == KEY_dump) {
5450             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5451         }
5452         *pgv = NULL;
5453         *pgvp = 0;
5454         if (hgv && key != KEY_x)        /* never ambiguous */
5455             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5456                            "Ambiguous call resolved as CORE::%s(), "
5457                            "qualify as such or use &",
5458                            GvENAME(hgv));
5459         return key;
5460     }
5461 }
5462
5463 static int
5464 yyl_qw(pTHX_ char *s, STRLEN len)
5465 {
5466     OP *words = NULL;
5467
5468     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5469     if (!s)
5470         missingterm(NULL, 0);
5471
5472     COPLINE_SET_FROM_MULTI_END;
5473     PL_expect = XOPERATOR;
5474     if (SvCUR(PL_lex_stuff)) {
5475         int warned_comma = !ckWARN(WARN_QW);
5476         int warned_comment = warned_comma;
5477         char *d = SvPV_force(PL_lex_stuff, len);
5478         while (len) {
5479             for (; isSPACE(*d) && len; --len, ++d)
5480                 /**/;
5481             if (len) {
5482                 SV *sv;
5483                 const char *b = d;
5484                 if (!warned_comma || !warned_comment) {
5485                     for (; !isSPACE(*d) && len; --len, ++d) {
5486                         if (!warned_comma && *d == ',') {
5487                             Perl_warner(aTHX_ packWARN(WARN_QW),
5488                                 "Possible attempt to separate words with commas");
5489                             ++warned_comma;
5490                         }
5491                         else if (!warned_comment && *d == '#') {
5492                             Perl_warner(aTHX_ packWARN(WARN_QW),
5493                                 "Possible attempt to put comments in qw() list");
5494                             ++warned_comment;
5495                         }
5496                     }
5497                 }
5498                 else {
5499                     for (; !isSPACE(*d) && len; --len, ++d)
5500                         /**/;
5501                 }
5502                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5503                 words = op_append_elem(OP_LIST, words,
5504                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5505             }
5506         }
5507     }
5508     if (!words)
5509         words = newNULLLIST();
5510     SvREFCNT_dec_NN(PL_lex_stuff);
5511     PL_lex_stuff = NULL;
5512     PL_expect = XOPERATOR;
5513     pl_yylval.opval = sawparens(words);
5514     TOKEN(QWLIST);
5515 }
5516
5517 static int
5518 yyl_hyphen(pTHX_ char *s)
5519 {
5520     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5521         I32 ftst = 0;
5522         char tmp;
5523
5524         s++;
5525         PL_bufptr = s;
5526         tmp = *s++;
5527
5528         while (s < PL_bufend && SPACE_OR_TAB(*s))
5529             s++;
5530
5531         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5532             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5533             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5534             OPERATOR('-');              /* unary minus */
5535         }
5536         switch (tmp) {
5537         case 'r': ftst = OP_FTEREAD;    break;
5538         case 'w': ftst = OP_FTEWRITE;   break;
5539         case 'x': ftst = OP_FTEEXEC;    break;
5540         case 'o': ftst = OP_FTEOWNED;   break;
5541         case 'R': ftst = OP_FTRREAD;    break;
5542         case 'W': ftst = OP_FTRWRITE;   break;
5543         case 'X': ftst = OP_FTREXEC;    break;
5544         case 'O': ftst = OP_FTROWNED;   break;
5545         case 'e': ftst = OP_FTIS;       break;
5546         case 'z': ftst = OP_FTZERO;     break;
5547         case 's': ftst = OP_FTSIZE;     break;
5548         case 'f': ftst = OP_FTFILE;     break;
5549         case 'd': ftst = OP_FTDIR;      break;
5550         case 'l': ftst = OP_FTLINK;     break;
5551         case 'p': ftst = OP_FTPIPE;     break;
5552         case 'S': ftst = OP_FTSOCK;     break;
5553         case 'u': ftst = OP_FTSUID;     break;
5554         case 'g': ftst = OP_FTSGID;     break;
5555         case 'k': ftst = OP_FTSVTX;     break;
5556         case 'b': ftst = OP_FTBLK;      break;
5557         case 'c': ftst = OP_FTCHR;      break;
5558         case 't': ftst = OP_FTTTY;      break;
5559         case 'T': ftst = OP_FTTEXT;     break;
5560         case 'B': ftst = OP_FTBINARY;   break;
5561         case 'M': case 'A': case 'C':
5562             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5563             switch (tmp) {
5564             case 'M': ftst = OP_FTMTIME; break;
5565             case 'A': ftst = OP_FTATIME; break;
5566             case 'C': ftst = OP_FTCTIME; break;
5567             default:                     break;
5568             }
5569             break;
5570         default:
5571             break;
5572         }
5573         if (ftst) {
5574             PL_last_uni = PL_oldbufptr;
5575             PL_last_lop_op = (OPCODE)ftst;
5576             DEBUG_T( {
5577                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5578             } );
5579             FTST(ftst);
5580         }
5581         else {
5582             /* Assume it was a minus followed by a one-letter named
5583              * subroutine call (or a -bareword), then. */
5584             DEBUG_T( {
5585                 PerlIO_printf(Perl_debug_log,
5586                     "### '-%c' looked like a file test but was not\n",
5587                     (int) tmp);
5588             } );
5589             s = --PL_bufptr;
5590         }
5591     }
5592     {
5593         const char tmp = *s++;
5594         if (*s == tmp) {
5595             s++;
5596             if (PL_expect == XOPERATOR)
5597                 TERM(POSTDEC);
5598             else
5599                 OPERATOR(PREDEC);
5600         }
5601         else if (*s == '>') {
5602             s++;
5603             s = skipspace(s);
5604             if (((*s == '$' || *s == '&') && s[1] == '*')
5605               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5606               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5607               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5608              )
5609             {
5610                 PL_expect = XPOSTDEREF;
5611                 TOKEN(ARROW);
5612             }
5613             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5614                 s = force_word(s,METHOD,FALSE,TRUE);
5615                 TOKEN(ARROW);
5616             }
5617             else if (*s == '$')
5618                 OPERATOR(ARROW);
5619             else
5620                 TERM(ARROW);
5621         }
5622         if (PL_expect == XOPERATOR) {
5623             if (*s == '='
5624                 && !PL_lex_allbrackets
5625                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5626             {
5627                 s--;
5628                 TOKEN(0);
5629             }
5630             Aop(OP_SUBTRACT);
5631         }
5632         else {
5633             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5634                 check_uni();
5635             OPERATOR('-');              /* unary minus */
5636         }
5637     }
5638 }
5639
5640 static int
5641 yyl_plus(pTHX_ char *s)
5642 {
5643     const char tmp = *s++;
5644     if (*s == tmp) {
5645         s++;
5646         if (PL_expect == XOPERATOR)
5647             TERM(POSTINC);
5648         else
5649             OPERATOR(PREINC);
5650     }
5651     if (PL_expect == XOPERATOR) {
5652         if (*s == '='
5653             && !PL_lex_allbrackets
5654             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5655         {
5656             s--;
5657             TOKEN(0);
5658         }
5659         Aop(OP_ADD);
5660     }
5661     else {
5662         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5663             check_uni();
5664         OPERATOR('+');
5665     }
5666 }
5667
5668 static int
5669 yyl_star(pTHX_ char *s)
5670 {
5671     if (PL_expect == XPOSTDEREF)
5672         POSTDEREF('*');
5673
5674     if (PL_expect != XOPERATOR) {
5675         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5676         PL_expect = XOPERATOR;
5677         force_ident(PL_tokenbuf, '*');
5678         if (!*PL_tokenbuf)
5679             PREREF('*');
5680         TERM('*');
5681     }
5682
5683     s++;
5684     if (*s == '*') {
5685         s++;
5686         if (*s == '=' && !PL_lex_allbrackets
5687             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5688         {
5689             s -= 2;
5690             TOKEN(0);
5691         }
5692         PWop(OP_POW);
5693     }
5694
5695     if (*s == '='
5696         && !PL_lex_allbrackets
5697         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5698     {
5699         s--;
5700         TOKEN(0);
5701     }
5702
5703     Mop(OP_MULTIPLY);
5704 }
5705
5706 static int
5707 yyl_percent(pTHX_ char *s)
5708 {
5709     if (PL_expect == XOPERATOR) {
5710         if (s[1] == '='
5711             && !PL_lex_allbrackets
5712             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5713         {
5714             TOKEN(0);
5715         }
5716         ++s;
5717         Mop(OP_MODULO);
5718     }
5719     else if (PL_expect == XPOSTDEREF)
5720         POSTDEREF('%');
5721
5722     PL_tokenbuf[0] = '%';
5723     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5724     pl_yylval.ival = 0;
5725     if (!PL_tokenbuf[1]) {
5726         PREREF('%');
5727     }
5728     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5729         && intuit_more(s, PL_bufend)) {
5730         if (*s == '[')
5731             PL_tokenbuf[0] = '@';
5732     }
5733     PL_expect = XOPERATOR;
5734     force_ident_maybe_lex('%');
5735     TERM('%');
5736 }
5737
5738 static int
5739 yyl_caret(pTHX_ char *s)
5740 {
5741     char *d = s;
5742     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5743     if (bof && s[1] == '.')
5744         s++;
5745     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5746             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5747     {
5748         s = d;
5749         TOKEN(0);
5750     }
5751     s++;
5752     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5753 }
5754
5755 static int
5756 yyl_colon(pTHX_ char *s)
5757 {
5758     OP *attrs;
5759
5760     switch (PL_expect) {
5761     case XOPERATOR:
5762         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5763             break;
5764         PL_bufptr = s;  /* update in case we back off */
5765         if (*s == '=') {
5766             Perl_croak(aTHX_
5767                        "Use of := for an empty attribute list is not allowed");
5768         }
5769         goto grabattrs;
5770     case XATTRBLOCK:
5771         PL_expect = XBLOCK;
5772         goto grabattrs;
5773     case XATTRTERM:
5774         PL_expect = XTERMBLOCK;
5775      grabattrs:
5776         /* NB: as well as parsing normal attributes, we also end up
5777          * here if there is something looking like attributes
5778          * following a signature (which is illegal, but used to be
5779          * legal in 5.20..5.26). If the latter, we still parse the
5780          * attributes so that error messages(s) are less confusing,
5781          * but ignore them (parser->sig_seen).
5782          */
5783         s = skipspace(s);
5784         attrs = NULL;
5785         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5786             bool sig = PL_parser->sig_seen;
5787             I32 tmp;
5788             SV *sv;
5789             STRLEN len;
5790             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5791             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5792                 if (tmp < 0) tmp = -tmp;
5793                 switch (tmp) {
5794                 case KEY_or:
5795                 case KEY_and:
5796                 case KEY_for:
5797                 case KEY_foreach:
5798                 case KEY_unless:
5799                 case KEY_if:
5800                 case KEY_while:
5801                 case KEY_until:
5802                     goto got_attrs;
5803                 default:
5804                     break;
5805                 }
5806             }
5807             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5808             if (*d == '(') {
5809                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5810                 if (!d) {
5811                     if (attrs)
5812                         op_free(attrs);
5813                     sv_free(sv);
5814                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5815                 }
5816                 COPLINE_SET_FROM_MULTI_END;
5817             }
5818             if (PL_lex_stuff) {
5819                 sv_catsv(sv, PL_lex_stuff);
5820                 attrs = op_append_elem(OP_LIST, attrs,
5821                                     newSVOP(OP_CONST, 0, sv));
5822                 SvREFCNT_dec_NN(PL_lex_stuff);
5823                 PL_lex_stuff = NULL;
5824             }
5825             else {
5826                 /* NOTE: any CV attrs applied here need to be part of
5827                    the CVf_BUILTIN_ATTRS define in cv.h! */
5828                 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5829                     sv_free(sv);
5830                     if (!sig)
5831                         CvLVALUE_on(PL_compcv);
5832                 }
5833                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5834                     sv_free(sv);
5835                     if (!sig)
5836                         CvMETHOD_on(PL_compcv);
5837                 }
5838                 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5839                     sv_free(sv);
5840                     if (!sig) {
5841                         Perl_ck_warner_d(aTHX_
5842                             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5843                            ":const is experimental"
5844                         );
5845                         CvANONCONST_on(PL_compcv);
5846                         if (!CvANON(PL_compcv))
5847                             yyerror(":const is not permitted on named "
5848                                     "subroutines");
5849                     }
5850                 }
5851                 /* After we've set the flags, it could be argued that
5852                    we don't need to do the attributes.pm-based setting
5853                    process, and shouldn't bother appending recognized
5854                    flags.  To experiment with that, uncomment the
5855                    following "else".  (Note that's already been
5856                    uncommented.  That keeps the above-applied built-in
5857                    attributes from being intercepted (and possibly
5858                    rejected) by a package's attribute routines, but is
5859                    justified by the performance win for the common case
5860                    of applying only built-in attributes.) */
5861                 else
5862                     attrs = op_append_elem(OP_LIST, attrs,
5863                                         newSVOP(OP_CONST, 0,
5864                                                 sv));
5865             }
5866             s = skipspace(d);
5867             if (*s == ':' && s[1] != ':')
5868                 s = skipspace(s+1);
5869             else if (s == d)
5870                 break;  /* require real whitespace or :'s */
5871             /* XXX losing whitespace on sequential attributes here */
5872         }
5873
5874         if (*s != ';'
5875             && *s != '}'
5876             && !(PL_expect == XOPERATOR
5877                  ? (*s == '=' ||  *s == ')')
5878                  : (*s == '{' ||  *s == '(')))
5879         {
5880             const char q = ((*s == '\'') ? '"' : '\'');
5881             /* If here for an expression, and parsed no attrs, back off. */
5882             if (PL_expect == XOPERATOR && !attrs) {
5883                 s = PL_bufptr;
5884                 break;
5885             }
5886             /* MUST advance bufptr here to avoid bogus "at end of line"
5887                context messages from yyerror().
5888             */
5889             PL_bufptr = s;
5890             yyerror( (const char *)
5891                      (*s
5892                       ? Perl_form(aTHX_ "Invalid separator character "
5893                                   "%c%c%c in attribute list", q, *s, q)
5894                       : "Unterminated attribute list" ) );
5895             if (attrs)
5896                 op_free(attrs);
5897             OPERATOR(':');
5898         }
5899
5900     got_attrs:
5901         if (PL_parser->sig_seen) {
5902             /* see comment about about sig_seen and parser error
5903              * handling */
5904             if (attrs)
5905                 op_free(attrs);
5906             Perl_croak(aTHX_ "Subroutine attributes must come "
5907                              "before the signature");
5908         }
5909         if (attrs) {
5910             NEXTVAL_NEXTTOKE.opval = attrs;
5911             force_next(THING);
5912         }
5913         TOKEN(COLONATTR);
5914     }
5915
5916     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5917         s--;
5918         TOKEN(0);
5919     }
5920
5921     PL_lex_allbrackets--;
5922     OPERATOR(':');
5923 }
5924
5925 static int
5926 yyl_subproto(pTHX_ char *s, CV *cv)
5927 {
5928     STRLEN protolen = CvPROTOLEN(cv);
5929     const char *proto = CvPROTO(cv);
5930     bool optional;
5931
5932     proto = S_strip_spaces(aTHX_ proto, &protolen);
5933     if (!protolen)
5934         TERM(FUNC0SUB);
5935     if ((optional = *proto == ';')) {
5936         do {
5937             proto++;
5938         } while (*proto == ';');
5939     }
5940
5941     if (
5942         (
5943             (
5944                 *proto == '$' || *proto == '_'
5945              || *proto == '*' || *proto == '+'
5946             )
5947          && proto[1] == '\0'
5948         )
5949      || (
5950          *proto == '\\' && proto[1] && proto[2] == '\0'
5951         )
5952     ) {
5953         UNIPROTO(UNIOPSUB,optional);
5954     }
5955
5956     if (*proto == '\\' && proto[1] == '[') {
5957         const char *p = proto + 2;
5958         while(*p && *p != ']')
5959             ++p;
5960         if(*p == ']' && !p[1])
5961             UNIPROTO(UNIOPSUB,optional);
5962     }
5963
5964     if (*proto == '&' && *s == '{') {
5965         if (PL_curstash)
5966             sv_setpvs(PL_subname, "__ANON__");
5967         else
5968             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5969         if (!PL_lex_allbrackets
5970             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5971         {
5972             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5973         }
5974         PREBLOCK(LSTOPSUB);
5975     }
5976
5977     return KEY_NULL;
5978 }
5979
5980 static int
5981 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
5982 {
5983     char *d;
5984     if (PL_lex_brackets > 100) {
5985         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5986     }
5987
5988     switch (PL_expect) {
5989     case XTERM:
5990     case XTERMORDORDOR:
5991         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5992         PL_lex_allbrackets++;
5993         OPERATOR(HASHBRACK);
5994     case XOPERATOR:
5995         while (s < PL_bufend && SPACE_OR_TAB(*s))
5996             s++;
5997         d = s;
5998         PL_tokenbuf[0] = '\0';
5999         if (d < PL_bufend && *d == '-') {
6000             PL_tokenbuf[0] = '-';
6001             d++;
6002             while (d < PL_bufend && SPACE_OR_TAB(*d))
6003                 d++;
6004         }
6005         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6006             STRLEN len;
6007             d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6008                           FALSE, &len);
6009             while (d < PL_bufend && SPACE_OR_TAB(*d))
6010                 d++;
6011             if (*d == '}') {
6012                 const char minus = (PL_tokenbuf[0] == '-');
6013                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6014                 if (minus)
6015                     force_next('-');
6016             }
6017         }
6018         /* FALLTHROUGH */
6019     case XATTRTERM:
6020     case XTERMBLOCK:
6021         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6022         PL_lex_allbrackets++;
6023         PL_expect = XSTATE;
6024         break;
6025     case XATTRBLOCK:
6026     case XBLOCK:
6027         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6028         PL_lex_allbrackets++;
6029         PL_expect = XSTATE;
6030         break;
6031     case XBLOCKTERM:
6032         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6033         PL_lex_allbrackets++;
6034         PL_expect = XSTATE;
6035         break;
6036     default: {
6037             const char *t;
6038             if (PL_oldoldbufptr == PL_last_lop)
6039                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6040             else
6041                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6042             PL_lex_allbrackets++;
6043             s = skipspace(s);
6044             if (*s == '}') {
6045                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6046                     PL_expect = XTERM;
6047                     /* This hack is to get the ${} in the message. */
6048                     PL_bufptr = s+1;
6049                     yyerror("syntax error");
6050                     break;
6051                 }
6052                 OPERATOR(HASHBRACK);
6053             }
6054             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6055                 /* ${...} or @{...} etc., but not print {...}
6056                  * Skip the disambiguation and treat this as a block.
6057                  */
6058                 goto block_expectation;
6059             }
6060             /* This hack serves to disambiguate a pair of curlies
6061              * as being a block or an anon hash.  Normally, expectation
6062              * determines that, but in cases where we're not in a
6063              * position to expect anything in particular (like inside
6064              * eval"") we have to resolve the ambiguity.  This code
6065              * covers the case where the first term in the curlies is a
6066              * quoted string.  Most other cases need to be explicitly
6067              * disambiguated by prepending a "+" before the opening
6068              * curly in order to force resolution as an anon hash.
6069              *
6070              * XXX should probably propagate the outer expectation
6071              * into eval"" to rely less on this hack, but that could
6072              * potentially break current behavior of eval"".
6073              * GSAR 97-07-21
6074              */
6075             t = s;
6076             if (*s == '\'' || *s == '"' || *s == '`') {
6077                 /* common case: get past first string, handling escapes */
6078                 for (t++; t < PL_bufend && *t != *s;)
6079                     if (*t++ == '\\')
6080                         t++;
6081                 t++;
6082             }
6083             else if (*s == 'q') {
6084                 if (++t < PL_bufend
6085                     && (!isWORDCHAR(*t)
6086                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6087                             && !isWORDCHAR(*t))))
6088                 {
6089                     /* skip q//-like construct */
6090                     const char *tmps;
6091                     char open, close, term;
6092                     I32 brackets = 1;
6093
6094                     while (t < PL_bufend && isSPACE(*t))
6095                         t++;
6096                     /* check for q => */
6097                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6098                         OPERATOR(HASHBRACK);
6099                     }
6100                     term = *t;
6101                     open = term;
6102                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6103                         term = tmps[5];
6104                     close = term;
6105                     if (open == close)
6106                         for (t++; t < PL_bufend; t++) {
6107                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6108                                 t++;
6109                             else if (*t == open)
6110                                 break;
6111                         }
6112                     else {
6113                         for (t++; t < PL_bufend; t++) {
6114                             if (*t == '\\' && t+1 < PL_bufend)
6115                                 t++;
6116                             else if (*t == close && --brackets <= 0)
6117                                 break;
6118                             else if (*t == open)
6119                                 brackets++;
6120                         }
6121                     }
6122                     t++;
6123                 }
6124                 else
6125                     /* skip plain q word */
6126                     while (   t < PL_bufend
6127                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6128                     {
6129                         t += UTF ? UTF8SKIP(t) : 1;
6130                     }
6131             }
6132             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6133                 t += UTF ? UTF8SKIP(t) : 1;
6134                 while (   t < PL_bufend
6135                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6136                 {
6137                     t += UTF ? UTF8SKIP(t) : 1;
6138                 }
6139             }
6140             while (t < PL_bufend && isSPACE(*t))
6141                 t++;
6142             /* if comma follows first term, call it an anon hash */
6143             /* XXX it could be a comma expression with loop modifiers */
6144             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6145                                || (*t == '=' && t[1] == '>')))
6146                 OPERATOR(HASHBRACK);
6147             if (PL_expect == XREF) {
6148               block_expectation:
6149                 /* If there is an opening brace or 'sub:', treat it
6150                    as a term to make ${{...}}{k} and &{sub:attr...}
6151                    dwim.  Otherwise, treat it as a statement, so
6152                    map {no strict; ...} works.
6153                  */
6154                 s = skipspace(s);
6155                 if (*s == '{') {
6156                     PL_expect = XTERM;
6157                     break;
6158                 }
6159                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6160                     PL_bufptr = s;
6161                     d = s + 3;
6162                     d = skipspace(d);
6163                     s = PL_bufptr;
6164                     if (*d == ':') {
6165                         PL_expect = XTERM;
6166                         break;
6167                     }
6168                 }
6169                 PL_expect = XSTATE;
6170             }
6171             else {
6172                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6173                 PL_expect = XSTATE;
6174             }
6175         }
6176         break;
6177     }
6178
6179     pl_yylval.ival = CopLINE(PL_curcop);
6180     PL_copline = NOLINE;   /* invalidate current command line number */
6181     TOKEN(formbrack ? '=' : '{');
6182 }
6183
6184 static int
6185 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6186 {
6187     assert(s != PL_bufend);
6188     s++;
6189
6190     if (PL_lex_brackets <= 0)
6191         /* diag_listed_as: Unmatched right %s bracket */
6192         yyerror("Unmatched right curly bracket");
6193     else
6194         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6195
6196     PL_lex_allbrackets--;
6197
6198     if (PL_lex_state == LEX_INTERPNORMAL) {
6199         if (PL_lex_brackets == 0) {
6200             if (PL_expect & XFAKEBRACK) {
6201                 PL_expect &= XENUMMASK;
6202                 PL_lex_state = LEX_INTERPEND;
6203                 PL_bufptr = s;
6204                 return yylex(); /* ignore fake brackets */
6205             }
6206             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6207              && SvEVALED(PL_lex_repl))
6208                 PL_lex_state = LEX_INTERPEND;
6209             else if (*s == '-' && s[1] == '>')
6210                 PL_lex_state = LEX_INTERPENDMAYBE;
6211             else if (*s != '[' && *s != '{')
6212                 PL_lex_state = LEX_INTERPEND;
6213         }
6214     }
6215
6216     if (PL_expect & XFAKEBRACK) {
6217         PL_expect &= XENUMMASK;
6218         PL_bufptr = s;
6219         return yylex();         /* ignore fake brackets */
6220     }
6221
6222     force_next(formbrack ? '.' : '}');
6223     if (formbrack) LEAVE_with_name("lex_format");
6224     if (formbrack == 2) { /* means . where arguments were expected */
6225         force_next(';');
6226         TOKEN(FORMRBRACK);
6227     }
6228
6229     TOKEN(';');
6230 }
6231
6232 static int
6233 yyl_ampersand(pTHX_ char *s)
6234 {
6235     if (PL_expect == XPOSTDEREF)
6236         POSTDEREF('&');
6237
6238     s++;
6239     if (*s++ == '&') {
6240         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6241                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6242             s -= 2;
6243             TOKEN(0);
6244         }
6245         AOPERATOR(ANDAND);
6246     }
6247     s--;
6248
6249     if (PL_expect == XOPERATOR) {
6250         char *d;
6251         bool bof;
6252         if (   PL_bufptr == PL_linestart
6253             && ckWARN(WARN_SEMICOLON)
6254             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6255         {
6256             CopLINE_dec(PL_curcop);
6257             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6258             CopLINE_inc(PL_curcop);
6259         }
6260         d = s;
6261         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6262             s++;
6263         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6264                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6265             s = d;
6266             s--;
6267             TOKEN(0);
6268         }
6269         if (d == s)
6270             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6271         else
6272             BAop(OP_SBIT_AND);
6273     }
6274
6275     PL_tokenbuf[0] = '&';
6276     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6277     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6278
6279     if (PL_tokenbuf[1])
6280         force_ident_maybe_lex('&');
6281     else
6282         PREREF('&');
6283
6284     TERM('&');
6285 }
6286
6287 static int
6288 yyl_verticalbar(pTHX_ char *s)
6289 {
6290     char *d;
6291     bool bof;
6292
6293     s++;
6294     if (*s++ == '|') {
6295         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6296                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6297             s -= 2;
6298             TOKEN(0);
6299         }
6300         AOPERATOR(OROR);
6301     }
6302
6303     s--;
6304     d = s;
6305     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6306         s++;
6307
6308     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6309             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6310         s = d - 1;
6311         TOKEN(0);
6312     }
6313
6314     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6315 }
6316
6317 static int
6318 yyl_bang(pTHX_ char *s)
6319 {
6320     const char tmp = *s++;
6321     if (tmp == '=') {
6322         /* was this !=~ where !~ was meant?
6323          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6324
6325         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6326             const char *t = s+1;
6327
6328             while (t < PL_bufend && isSPACE(*t))
6329                 ++t;
6330
6331             if (*t == '/' || *t == '?'
6332                 || ((*t == 'm' || *t == 's' || *t == 'y')
6333                     && !isWORDCHAR(t[1]))
6334                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6335                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6336                             "!=~ should be !~");
6337         }
6338
6339         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6340             s -= 2;
6341             TOKEN(0);
6342         }
6343
6344         ChEop(OP_NE);
6345     }
6346
6347     if (tmp == '~')
6348         PMop(OP_NOT);
6349
6350     s--;
6351     OPERATOR('!');
6352 }
6353
6354 static int
6355 yyl_snail(pTHX_ char *s)
6356 {
6357     if (PL_expect == XPOSTDEREF)
6358         POSTDEREF('@');
6359     PL_tokenbuf[0] = '@';
6360     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6361     if (PL_expect == XOPERATOR) {
6362         char *d = s;
6363         if (PL_bufptr > s) {
6364             d = PL_bufptr-1;
6365             PL_bufptr = PL_oldbufptr;
6366         }
6367         no_op("Array", d);
6368     }
6369     pl_yylval.ival = 0;
6370     if (!PL_tokenbuf[1]) {
6371         PREREF('@');
6372     }
6373     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6374         s = skipspace(s);
6375     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6376         && intuit_more(s, PL_bufend))
6377     {
6378         if (*s == '{')
6379             PL_tokenbuf[0] = '%';
6380
6381         /* Warn about @ where they meant $. */
6382         if (*s == '[' || *s == '{') {
6383             if (ckWARN(WARN_SYNTAX)) {
6384                 S_check_scalar_slice(aTHX_ s);
6385             }
6386         }
6387     }
6388     PL_expect = XOPERATOR;
6389     force_ident_maybe_lex('@');
6390     TERM('@');
6391 }
6392
6393 static int
6394 yyl_slash(pTHX_ char *s)
6395 {
6396     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6397         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6398                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6399             TOKEN(0);
6400         s += 2;
6401         AOPERATOR(DORDOR);
6402     }
6403     else if (PL_expect == XOPERATOR) {
6404         s++;
6405         if (*s == '=' && !PL_lex_allbrackets
6406             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6407         {
6408             s--;
6409             TOKEN(0);
6410         }
6411         Mop(OP_DIVIDE);
6412     }
6413     else {
6414         /* Disable warning on "study /blah/" */
6415         if (    PL_oldoldbufptr == PL_last_uni
6416             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6417                 || memNE(PL_last_uni, "study", 5)
6418                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6419          ))
6420             check_uni();
6421         s = scan_pat(s,OP_MATCH);
6422         TERM(sublex_start());
6423     }
6424 }
6425
6426 static int
6427 yyl_leftsquare(pTHX_ char *s)
6428 {
6429     char tmp;
6430
6431     if (PL_lex_brackets > 100)
6432         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6433     PL_lex_brackstack[PL_lex_brackets++] = 0;
6434     PL_lex_allbrackets++;
6435     tmp = *s++;
6436     OPERATOR(tmp);
6437 }
6438
6439 static int
6440 yyl_rightsquare(pTHX_ char *s)
6441 {
6442     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6443         TOKEN(0);
6444     s++;
6445     if (PL_lex_brackets <= 0)
6446         /* diag_listed_as: Unmatched right %s bracket */
6447         yyerror("Unmatched right square bracket");
6448     else
6449         --PL_lex_brackets;
6450     PL_lex_allbrackets--;
6451     if (PL_lex_state == LEX_INTERPNORMAL) {
6452         if (PL_lex_brackets == 0) {
6453             if (*s == '-' && s[1] == '>')
6454                 PL_lex_state = LEX_INTERPENDMAYBE;
6455             else if (*s != '[' && *s != '{')
6456                 PL_lex_state = LEX_INTERPEND;
6457         }
6458     }
6459     TERM(']');
6460 }
6461
6462 static int
6463 yyl_tilde(pTHX_ char *s)
6464 {
6465     bool bof;
6466     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6467         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6468             TOKEN(0);
6469         s += 2;
6470         Perl_ck_warner_d(aTHX_
6471             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6472             "Smartmatch is experimental");
6473         NCEop(OP_SMARTMATCH);
6474     }
6475     s++;
6476     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6477         s++;
6478         BCop(OP_SCOMPLEMENT);
6479     }
6480     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6481 }
6482
6483 static int
6484 yyl_leftparen(pTHX_ char *s)
6485 {
6486     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6487         PL_oldbufptr = PL_oldoldbufptr;         /* allow print(STDOUT 123) */
6488     else
6489         PL_expect = XTERM;
6490     s = skipspace(s);
6491     PL_lex_allbrackets++;
6492     TOKEN('(');
6493 }
6494
6495 static int
6496 yyl_rightparen(pTHX_ char *s)
6497 {
6498     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6499         TOKEN(0);
6500     s++;
6501     PL_lex_allbrackets--;
6502     s = skipspace(s);
6503     if (*s == '{')
6504         PREBLOCK(')');
6505     TERM(')');
6506 }
6507
6508 static int
6509 yyl_leftpointy(pTHX_ char *s)
6510 {
6511     char tmp;
6512
6513     if (PL_expect != XOPERATOR) {
6514         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6515             check_uni();
6516         if (s[1] == '<' && s[2] != '>')
6517             s = scan_heredoc(s);
6518         else
6519             s = scan_inputsymbol(s);
6520         PL_expect = XOPERATOR;
6521         TOKEN(sublex_start());
6522     }
6523
6524     s++;
6525
6526     tmp = *s++;
6527     if (tmp == '<') {
6528         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6529             s -= 2;
6530             TOKEN(0);
6531         }
6532         SHop(OP_LEFT_SHIFT);
6533     }
6534     if (tmp == '=') {
6535         tmp = *s++;
6536         if (tmp == '>') {
6537             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6538                 s -= 3;
6539                 TOKEN(0);
6540             }
6541             NCEop(OP_NCMP);
6542         }
6543         s--;
6544         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6545             s -= 2;
6546             TOKEN(0);
6547         }
6548         ChRop(OP_LE);
6549     }
6550
6551     s--;
6552     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6553         s--;
6554         TOKEN(0);
6555     }
6556
6557     ChRop(OP_LT);
6558 }
6559
6560 static int
6561 yyl_rightpointy(pTHX_ char *s)
6562 {
6563     const char tmp = *s++;
6564
6565     if (tmp == '>') {
6566         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6567             s -= 2;
6568             TOKEN(0);
6569         }
6570         SHop(OP_RIGHT_SHIFT);
6571     }
6572     else if (tmp == '=') {
6573         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6574             s -= 2;
6575             TOKEN(0);
6576         }
6577         ChRop(OP_GE);
6578     }
6579
6580     s--;
6581     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6582         s--;
6583         TOKEN(0);
6584     }
6585
6586     ChRop(OP_GT);
6587 }
6588
6589 static int
6590 yyl_sglquote(pTHX_ char *s)
6591 {
6592     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6593     if (!s)
6594         missingterm(NULL, 0);
6595     COPLINE_SET_FROM_MULTI_END;
6596     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6597     if (PL_expect == XOPERATOR) {
6598         no_op("String",s);
6599     }
6600     pl_yylval.ival = OP_CONST;
6601     TERM(sublex_start());
6602 }
6603
6604 static int
6605 yyl_dblquote(pTHX_ char *s)
6606 {
6607     char *d;
6608     STRLEN len;
6609     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6610     DEBUG_T( {
6611         if (s)
6612             printbuf("### Saw string before %s\n", s);
6613         else
6614             PerlIO_printf(Perl_debug_log,
6615                          "### Saw unterminated string\n");
6616     } );
6617     if (PL_expect == XOPERATOR) {
6618             no_op("String",s);
6619     }
6620     if (!s)
6621         missingterm(NULL, 0);
6622     pl_yylval.ival = OP_CONST;
6623     /* FIXME. I think that this can be const if char *d is replaced by
6624        more localised variables.  */
6625     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6626         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6627             pl_yylval.ival = OP_STRINGIFY;
6628             break;
6629         }
6630     }
6631     if (pl_yylval.ival == OP_CONST)
6632         COPLINE_SET_FROM_MULTI_END;
6633     TERM(sublex_start());
6634 }
6635
6636 static int
6637 yyl_backtick(pTHX_ char *s)
6638 {
6639     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6640     DEBUG_T( {
6641         if (s)
6642             printbuf("### Saw backtick string before %s\n", s);
6643         else
6644             PerlIO_printf(Perl_debug_log,
6645                          "### Saw unterminated backtick string\n");
6646     } );
6647     if (PL_expect == XOPERATOR)
6648         no_op("Backticks",s);
6649     if (!s)
6650         missingterm(NULL, 0);
6651     pl_yylval.ival = OP_BACKTICK;
6652     TERM(sublex_start());
6653 }
6654
6655 static int
6656 yyl_backslash(pTHX_ char *s)
6657 {
6658     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6659         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6660                        *s, *s);
6661     if (PL_expect == XOPERATOR)
6662         no_op("Backslash",s);
6663     OPERATOR(REFGEN);
6664 }
6665
6666 static void
6667 yyl_data_handle(pTHX)
6668 {
6669     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6670                             ? PL_curstash
6671                             : PL_defstash;
6672     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6673
6674     if (!isGV(gv))
6675         gv_init(gv,stash,"DATA",4,0);
6676
6677     GvMULTI_on(gv);
6678     if (!GvIO(gv))
6679         GvIOp(gv) = newIO();
6680     IoIFP(GvIOp(gv)) = PL_rsfp;
6681
6682     /* Mark this internal pseudo-handle as clean */
6683     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6684     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6685         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6686     else
6687         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6688
6689 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6690     /* if the script was opened in binmode, we need to revert
6691      * it to text mode for compatibility; but only iff it has CRs
6692      * XXX this is a questionable hack at best. */
6693     if (PL_bufend-PL_bufptr > 2
6694         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6695     {
6696         Off_t loc = 0;
6697         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6698             loc = PerlIO_tell(PL_rsfp);
6699             (void)PerlIO_seek(PL_rsfp, 0L, 0);
6700         }
6701         if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6702             if (loc > 0)
6703                 PerlIO_seek(PL_rsfp, loc, 0);
6704         }
6705     }
6706 #endif
6707
6708 #ifdef PERLIO_LAYERS
6709     if (!IN_BYTES) {
6710         if (UTF)
6711             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6712     }
6713 #endif
6714
6715     PL_rsfp = NULL;
6716 }
6717
6718 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6719     __attribute__noreturn__;
6720
6721 PERL_STATIC_NO_RET void
6722 yyl_croak_unrecognised(pTHX_ char *s)
6723 {
6724     SV *dsv = newSVpvs_flags("", SVs_TEMP);
6725     const char *c;
6726     char *d;
6727     STRLEN len;
6728
6729     if (UTF) {
6730         STRLEN skiplen = UTF8SKIP(s);
6731         STRLEN stravail = PL_bufend - s;
6732         c = sv_uni_display(dsv, newSVpvn_flags(s,
6733                                                skiplen > stravail ? stravail : skiplen,
6734                                                SVs_TEMP | SVf_UTF8),
6735                            10, UNI_DISPLAY_ISPRINT);
6736     }
6737     else {
6738         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6739     }
6740
6741     if (s >= PL_linestart) {
6742         d = PL_linestart;
6743     }
6744     else {
6745         /* somehow (probably due to a parse failure), PL_linestart has advanced
6746          * pass PL_bufptr, get a reasonable beginning of line
6747          */
6748         d = s;
6749         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6750             --d;
6751     }
6752     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6753     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6754         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6755     }
6756
6757     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6758                       UTF8fARG(UTF, (s - d), d),
6759                      (int) len + 1);
6760 }
6761
6762 static int
6763 yyl_require(pTHX_ char *s, I32 orig_keyword)
6764 {
6765     s = skipspace(s);
6766     if (isDIGIT(*s)) {
6767         s = force_version(s, FALSE);
6768     }
6769     else if (*s != 'v' || !isDIGIT(s[1])
6770             || (s = force_version(s, TRUE), *s == 'v'))
6771     {
6772         *PL_tokenbuf = '\0';
6773         s = force_word(s,BAREWORD,TRUE,TRUE);
6774         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6775                                    PL_tokenbuf + sizeof(PL_tokenbuf),
6776                                    UTF))
6777         {
6778             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6779                         GV_ADD | (UTF ? SVf_UTF8 : 0));
6780         }
6781         else if (*s == '<')
6782             yyerror("<> at require-statement should be quotes");
6783     }
6784
6785     if (orig_keyword == KEY_require)
6786         pl_yylval.ival = 1;
6787     else
6788         pl_yylval.ival = 0;
6789
6790     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6791     PL_bufptr = s;
6792     PL_last_uni = PL_oldbufptr;
6793     PL_last_lop_op = OP_REQUIRE;
6794     s = skipspace(s);
6795     return REPORT( (int)REQUIRE );
6796 }
6797
6798 static int
6799 yyl_foreach(pTHX_ char *s)
6800 {
6801     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6802         return REPORT(0);
6803     pl_yylval.ival = CopLINE(PL_curcop);
6804     s = skipspace(s);
6805     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6806         char *p = s;
6807         SSize_t s_off = s - SvPVX(PL_linestr);
6808         STRLEN len;
6809
6810         if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6811             p += 2;
6812         }
6813         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6814             p += 3;
6815         }
6816
6817         p = skipspace(p);
6818         /* skip optional package name, as in "for my abc $x (..)" */
6819         if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6820             p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6821             p = skipspace(p);
6822         }
6823         if (*p != '$' && *p != '\\')
6824             Perl_croak(aTHX_ "Missing $ on loop variable");
6825
6826         /* The buffer may have been reallocated, update s */
6827         s = SvPVX(PL_linestr) + s_off;
6828     }
6829     OPERATOR(FOR);
6830 }
6831
6832 static int
6833 yyl_do(pTHX_ char *s, I32 orig_keyword)
6834 {
6835     s = skipspace(s);
6836     if (*s == '{')
6837         PRETERMBLOCK(DO);
6838     if (*s != '\'') {
6839         char *d;
6840         STRLEN len;
6841         *PL_tokenbuf = '&';
6842         d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6843                       1, &len);
6844         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6845          && !keyword(PL_tokenbuf + 1, len, 0)) {
6846             SSize_t off = s-SvPVX(PL_linestr);
6847             d = skipspace(d);
6848             s = SvPVX(PL_linestr)+off;
6849             if (*d == '(') {
6850                 force_ident_maybe_lex('&');
6851                 s = d;
6852             }
6853         }
6854     }
6855     if (orig_keyword == KEY_do)
6856         pl_yylval.ival = 1;
6857     else
6858         pl_yylval.ival = 0;
6859     OPERATOR(DO);
6860 }
6861
6862 static int
6863 yyl_my(pTHX_ char *s, I32 my)
6864 {
6865     if (PL_in_my) {
6866         PL_bufptr = s;
6867         yyerror(Perl_form(aTHX_
6868                           "Can't redeclare \"%s\" in \"%s\"",
6869                            my       == KEY_my    ? "my" :
6870                            my       == KEY_state ? "state" : "our",
6871                            PL_in_my == KEY_my    ? "my" :
6872                            PL_in_my == KEY_state ? "state" : "our"));
6873     }
6874     PL_in_my = (U16)my;
6875     s = skipspace(s);
6876     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6877         STRLEN len;
6878         s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6879         if (memEQs(PL_tokenbuf, len, "sub"))
6880             return yyl_sub(aTHX_ s, my);
6881         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6882         if (!PL_in_my_stash) {
6883             char tmpbuf[1024];
6884             int i;
6885             PL_bufptr = s;
6886             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6887             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6888             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6889         }
6890     }
6891     else if (*s == '\\') {
6892         if (!FEATURE_MYREF_IS_ENABLED)
6893             Perl_croak(aTHX_ "The experimental declared_refs "
6894                              "feature is not enabled");
6895         Perl_ck_warner_d(aTHX_
6896              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6897             "Declaring references is experimental");
6898     }
6899     OPERATOR(MY);
6900 }
6901
6902 static int yyl_try(pTHX_ char*);
6903
6904 static bool
6905 yyl_eol_needs_semicolon(pTHX_ char **ps)
6906 {
6907     char *s = *ps;
6908     if (PL_lex_state != LEX_NORMAL
6909         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6910     {
6911         const bool in_comment = *s == '#';
6912         char *d;
6913         if (*s == '#' && s == PL_linestart && PL_in_eval
6914          && !PL_rsfp && !PL_parser->filtered) {
6915             /* handle eval qq[#line 1 "foo"\n ...] */
6916             CopLINE_dec(PL_curcop);
6917             incline(s, PL_bufend);
6918         }
6919         d = s;
6920         while (d < PL_bufend && *d != '\n')
6921             d++;
6922         if (d < PL_bufend)
6923             d++;
6924         s = d;
6925         if (in_comment && d == PL_bufend
6926             && PL_lex_state == LEX_INTERPNORMAL
6927             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6928             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6929         else
6930             incline(s, PL_bufend);
6931         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6932             PL_lex_state = LEX_FORMLINE;
6933             force_next(FORMRBRACK);
6934             *ps = s;
6935             return TRUE;
6936         }
6937     }
6938     else {
6939         while (s < PL_bufend && *s != '\n')
6940             s++;
6941         if (s < PL_bufend) {
6942             s++;
6943             if (s < PL_bufend)
6944                 incline(s, PL_bufend);
6945         }
6946     }
6947     *ps = s;
6948     return FALSE;
6949 }
6950
6951 static int
6952 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
6953 {
6954     char *d;
6955
6956     goto start;
6957
6958     do {
6959         fake_eof = 0;
6960         bof = cBOOL(PL_rsfp);
6961       start:
6962
6963         PL_bufptr = PL_bufend;
6964         COPLINE_INC_WITH_HERELINES;
6965         if (!lex_next_chunk(fake_eof)) {
6966             CopLINE_dec(PL_curcop);
6967             s = PL_bufptr;
6968             TOKEN(';'); /* not infinite loop because rsfp is NULL now */
6969         }
6970         CopLINE_dec(PL_curcop);
6971         s = PL_bufptr;
6972         /* If it looks like the start of a BOM or raw UTF-16,
6973          * check if it in fact is. */
6974         if (bof && PL_rsfp
6975             && (   *s == 0
6976                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
6977                 || *(U8*)s >= 0xFE
6978                 || s[1] == 0))
6979         {
6980             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
6981             bof = (offset == (Off_t)SvCUR(PL_linestr));
6982 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
6983             /* offset may include swallowed CR */
6984             if (!bof)
6985                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6986 #endif
6987             if (bof) {
6988                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6989                 s = swallow_bom((U8*)s);
6990             }
6991         }
6992         if (PL_parser->in_pod) {
6993             /* Incest with pod. */
6994             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
6995                 && !isALPHA(s[4]))
6996             {
6997                 SvPVCLEAR(PL_linestr);
6998                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
6999                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7000                 PL_last_lop = PL_last_uni = NULL;
7001                 PL_parser->in_pod = 0;
7002             }
7003         }
7004         if (PL_rsfp || PL_parser->filtered)
7005             incline(s, PL_bufend);
7006     } while (PL_parser->in_pod);
7007
7008     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7009     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7010     PL_last_lop = PL_last_uni = NULL;
7011     if (CopLINE(PL_curcop) == 1) {
7012         while (s < PL_bufend && isSPACE(*s))
7013             s++;
7014         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7015             s++;
7016         d = NULL;
7017         if (!PL_in_eval) {
7018             if (*s == '#' && *(s+1) == '!')
7019                 d = s + 2;
7020 #ifdef ALTERNATE_SHEBANG
7021             else {
7022                 static char const as[] = ALTERNATE_SHEBANG;
7023                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7024                     d = s + (sizeof(as) - 1);
7025             }
7026 #endif /* ALTERNATE_SHEBANG */
7027         }
7028         if (d) {
7029             char *ipath;
7030             char *ipathend;
7031
7032             while (isSPACE(*d))
7033                 d++;
7034             ipath = d;
7035             while (*d && !isSPACE(*d))
7036                 d++;
7037             ipathend = d;
7038
7039 #ifdef ARG_ZERO_IS_SCRIPT
7040             if (ipathend > ipath) {
7041                 /*
7042                  * HP-UX (at least) sets argv[0] to the script name,
7043                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7044                  * at least, set argv[0] to the basename of the Perl
7045                  * interpreter. So, having found "#!", we'll set it right.
7046                  */
7047                 SV* copfilesv = CopFILESV(PL_curcop);
7048                 if (copfilesv) {
7049                     SV * const x =
7050                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7051                                          SVt_PV)); /* $^X */
7052                     assert(SvPOK(x) || SvGMAGICAL(x));
7053                     if (sv_eq(x, copfilesv)) {
7054                         sv_setpvn(x, ipath, ipathend - ipath);
7055                         SvSETMAGIC(x);
7056                     }
7057                     else {
7058                         STRLEN blen;
7059                         STRLEN llen;
7060                         const char *bstart = SvPV_const(copfilesv, blen);
7061                         const char * const lstart = SvPV_const(x, llen);
7062                         if (llen < blen) {
7063                             bstart += blen - llen;
7064                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7065                                 sv_setpvn(x, ipath, ipathend - ipath);
7066                                 SvSETMAGIC(x);
7067                             }
7068                         }
7069                     }
7070                 }
7071                 else {
7072                     /* Anything to do if no copfilesv? */
7073                 }
7074                 TAINT_NOT;      /* $^X is always tainted, but that's OK */
7075             }
7076 #endif /* ARG_ZERO_IS_SCRIPT */
7077
7078             /*
7079              * Look for options.
7080              */
7081             d = instr(s,"perl -");
7082             if (!d) {
7083                 d = instr(s,"perl");
7084 #if defined(DOSISH)
7085                 /* avoid getting into infinite loops when shebang
7086                  * line contains "Perl" rather than "perl" */
7087                 if (!d) {
7088                     for (d = ipathend-4; d >= ipath; --d) {
7089                         if (isALPHA_FOLD_EQ(*d, 'p')
7090                             && !ibcmp(d, "perl", 4))
7091                         {
7092                             break;
7093                         }
7094                     }
7095                     if (d < ipath)
7096                         d = NULL;
7097                 }
7098 #endif
7099             }
7100 #ifdef ALTERNATE_SHEBANG
7101             /*
7102              * If the ALTERNATE_SHEBANG on this system starts with a
7103              * character that can be part of a Perl expression, then if
7104              * we see it but not "perl", we're probably looking at the
7105              * start of Perl code, not a request to hand off to some
7106              * other interpreter.  Similarly, if "perl" is there, but
7107              * not in the first 'word' of the line, we assume the line
7108              * contains the start of the Perl program.
7109              */
7110             if (d && *s != '#') {
7111                 const char *c = ipath;
7112                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7113                     c++;
7114                 if (c < d)
7115                     d = NULL;   /* "perl" not in first word; ignore */
7116                 else
7117                     *s = '#';   /* Don't try to parse shebang line */
7118             }
7119 #endif /* ALTERNATE_SHEBANG */
7120             if (!d
7121                 && *s == '#'
7122                 && ipathend > ipath
7123                 && !PL_minus_c
7124                 && !instr(s,"indir")
7125                 && instr(PL_origargv[0],"perl"))
7126             {
7127                 dVAR;
7128                 char **newargv;
7129
7130                 *ipathend = '\0';
7131                 s = ipathend + 1;
7132                 while (s < PL_bufend && isSPACE(*s))
7133                     s++;
7134                 if (s < PL_bufend) {
7135                     Newx(newargv,PL_origargc+3,char*);
7136                     newargv[1] = s;
7137                     while (s < PL_bufend && !isSPACE(*s))
7138                         s++;
7139                     *s = '\0';
7140                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7141                 }
7142                 else
7143                     newargv = PL_origargv;
7144                 newargv[0] = ipath;
7145                 PERL_FPU_PRE_EXEC
7146                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7147                 PERL_FPU_POST_EXEC
7148                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7149             }
7150             if (d) {
7151                 while (*d && !isSPACE(*d))
7152                     d++;
7153                 while (SPACE_OR_TAB(*d))
7154                     d++;
7155
7156                 if (*d++ == '-') {
7157                     const bool switches_done = PL_doswitches;
7158                     const U32 oldpdb = PL_perldb;
7159                     const bool oldn = PL_minus_n;
7160                     const bool oldp = PL_minus_p;
7161                     const char *d1 = d;
7162
7163                     do {
7164                         bool baduni = FALSE;
7165                         if (*d1 == 'C') {
7166                             const char *d2 = d1 + 1;
7167                             if (parse_unicode_opts((const char **)&d2)
7168                                 != PL_unicode)
7169                                 baduni = TRUE;
7170                         }
7171                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7172                             const char * const m = d1;
7173                             while (*d1 && !isSPACE(*d1))
7174                                 d1++;
7175                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7176                                   (int)(d1 - m), m);
7177                         }
7178                         d1 = moreswitches(d1);
7179                     } while (d1);
7180                     if (PL_doswitches && !switches_done) {
7181                         int argc = PL_origargc;
7182                         char **argv = PL_origargv;
7183                         do {
7184                             argc--,argv++;
7185                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7186                         init_argv_symbols(argc,argv);
7187                     }
7188                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7189                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7190                           /* if we have already added "LINE: while (<>) {",
7191                              we must not do it again */
7192                     {
7193                         SvPVCLEAR(PL_linestr);
7194                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7195                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7196                         PL_last_lop = PL_last_uni = NULL;
7197                         PL_preambled = FALSE;
7198                         if (PERLDB_LINE_OR_SAVESRC)
7199                             (void)gv_fetchfile(PL_origfilename);
7200                         return YYL_RETRY;
7201                     }
7202                 }
7203             }
7204         }
7205     }
7206
7207     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7208         PL_lex_state = LEX_FORMLINE;
7209         force_next(FORMRBRACK);
7210         TOKEN(';');
7211     }
7212
7213     PL_bufptr = s;
7214     return YYL_RETRY;
7215 }
7216
7217 static int
7218 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7219 {
7220     CLINE;
7221     pl_yylval.opval
7222         = newSVOP(OP_CONST, 0,
7223                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7224     pl_yylval.opval->op_private = OPpCONST_BARE;
7225     TERM(BAREWORD);
7226 }
7227
7228 static int
7229 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7230 {
7231     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7232         && PL_parser->saw_infix_sigil)
7233     {
7234         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7235                          "Operator or semicolon missing before %c%" UTF8f,
7236                          lastchar,
7237                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7238                                   PL_tokenbuf));
7239         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7240                          "Ambiguous use of %c resolved as operator %c",
7241                          lastchar, lastchar);
7242     }
7243     TOKEN(BAREWORD);
7244 }
7245
7246 static int
7247 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7248 {
7249     if (sv) {
7250         op_free(rv2cv_op);
7251         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7252         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7253         if (SvTYPE(sv) == SVt_PVAV)
7254             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7255                                       pl_yylval.opval);
7256         else {
7257             pl_yylval.opval->op_private = 0;
7258             pl_yylval.opval->op_folded = 1;
7259             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7260         }
7261         TOKEN(BAREWORD);
7262     }
7263
7264     op_free(pl_yylval.opval);
7265     pl_yylval.opval =
7266         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7267     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7268     PL_last_lop = PL_oldbufptr;
7269     PL_last_lop_op = OP_ENTERSUB;
7270
7271     /* Is there a prototype? */
7272     if (SvPOK(cv)) {
7273         int k = yyl_subproto(aTHX_ s, cv);
7274         if (k != KEY_NULL)
7275             return k;
7276     }
7277
7278     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7279     PL_expect = XTERM;
7280     force_next(off ? PRIVATEREF : BAREWORD);
7281     if (!PL_lex_allbrackets
7282         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7283     {
7284         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7285     }
7286
7287     TOKEN(NOAMP);
7288 }
7289
7290 /* Honour "reserved word" warnings, and enforce strict subs */
7291 static void
7292 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7293 {
7294     /* after "print" and similar functions (corresponding to
7295      * "F? L" in opcode.pl), whatever wasn't already parsed as
7296      * a filehandle should be subject to "strict subs".
7297      * Likewise for the optional indirect-object argument to system
7298      * or exec, which can't be a bareword */
7299     if ((PL_last_lop_op == OP_PRINT
7300             || PL_last_lop_op == OP_PRTF
7301             || PL_last_lop_op == OP_SAY
7302             || PL_last_lop_op == OP_SYSTEM
7303             || PL_last_lop_op == OP_EXEC)
7304         && (PL_hints & HINT_STRICT_SUBS))
7305     {
7306         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7307     }
7308
7309     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7310         char *d = PL_tokenbuf;
7311         while (isLOWER(*d))
7312             d++;
7313         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7314             /* PL_warn_reserved is constant */
7315             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7316             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7317                         PL_tokenbuf);
7318             GCC_DIAG_RESTORE_STMT;
7319         }
7320     }
7321 }
7322
7323 static int
7324 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7325 {
7326     int pkgname = 0;
7327     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7328     bool safebw;
7329     bool no_op_error = FALSE;
7330     /* Use this var to track whether intuit_method has been
7331        called.  intuit_method returns 0 or > 255.  */
7332     int key = 1;
7333
7334     if (PL_expect == XOPERATOR) {
7335         if (PL_bufptr == PL_linestart) {
7336             CopLINE_dec(PL_curcop);
7337             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7338             CopLINE_inc(PL_curcop);
7339         }
7340         else
7341             /* We want to call no_op with s pointing after the
7342                bareword, so defer it.  But we want it to come
7343                before the Bad name croak.  */
7344             no_op_error = TRUE;
7345     }
7346
7347     /* Get the rest if it looks like a package qualifier */
7348
7349     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7350         STRLEN morelen;
7351         s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7352                       TRUE, &morelen);
7353         if (no_op_error) {
7354             no_op("Bareword",s);
7355             no_op_error = FALSE;
7356         }
7357         if (!morelen)
7358             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7359                     UTF8fARG(UTF, len, PL_tokenbuf),
7360                     *s == '\'' ? "'" : "::");
7361         len += morelen;
7362         pkgname = 1;
7363     }
7364
7365     if (no_op_error)
7366         no_op("Bareword",s);
7367
7368     /* See if the name is "Foo::",
7369        in which case Foo is a bareword
7370        (and a package name). */
7371
7372     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7373         if (ckWARN(WARN_BAREWORD)
7374             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7375             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7376                         "Bareword \"%" UTF8f
7377                         "\" refers to nonexistent package",
7378                         UTF8fARG(UTF, len, PL_tokenbuf));
7379         len -= 2;
7380         PL_tokenbuf[len] = '\0';
7381         c.gv = NULL;
7382         c.gvp = 0;
7383         safebw = TRUE;
7384     }
7385     else {
7386         safebw = FALSE;
7387     }
7388
7389     /* if we saw a global override before, get the right name */
7390
7391     if (!c.sv)
7392         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7393     if (c.gvp) {
7394         SV *sv = newSVpvs("CORE::GLOBAL::");
7395         sv_catsv(sv, c.sv);
7396         SvREFCNT_dec(c.sv);
7397         c.sv = sv;
7398     }
7399
7400     /* Presume this is going to be a bareword of some sort. */
7401     CLINE;
7402     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7403     pl_yylval.opval->op_private = OPpCONST_BARE;
7404
7405     /* And if "Foo::", then that's what it certainly is. */
7406     if (safebw)
7407         return yyl_safe_bareword(aTHX_ s, lastchar);
7408
7409     if (!c.off) {
7410         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7411         const_op->op_private = OPpCONST_BARE;
7412         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7413         c.cv = c.lex
7414             ? isGV(c.gv)
7415                 ? GvCV(c.gv)
7416                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7417                     ? (CV *)SvRV(c.gv)
7418                     : ((CV *)c.gv)
7419             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7420     }
7421
7422     /* See if it's the indirect object for a list operator. */
7423
7424     if (PL_oldoldbufptr
7425         && PL_oldoldbufptr < PL_bufptr
7426         && (PL_oldoldbufptr == PL_last_lop
7427             || PL_oldoldbufptr == PL_last_uni)
7428         && /* NO SKIPSPACE BEFORE HERE! */
7429            (PL_expect == XREF
7430             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7431                                                    == OA_FILEREF))
7432     {
7433         bool immediate_paren = *s == '(';
7434         SSize_t s_off;
7435
7436         /* (Now we can afford to cross potential line boundary.) */
7437         s = skipspace(s);
7438
7439         /* intuit_method() can indirectly call lex_next_chunk(),
7440          * invalidating s
7441          */
7442         s_off = s - SvPVX(PL_linestr);
7443         /* Two barewords in a row may indicate method call. */
7444         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7445                 || *s == '$')
7446             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7447         {
7448             /* the code at method: doesn't use s */
7449             goto method;
7450         }
7451         s = SvPVX(PL_linestr) + s_off;
7452
7453         /* If not a declared subroutine, it's an indirect object. */
7454         /* (But it's an indir obj regardless for sort.) */
7455         /* Also, if "_" follows a filetest operator, it's a bareword */
7456
7457         if (
7458             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7459              || (!c.cv
7460                  && (PL_last_lop_op != OP_MAPSTART
7461                      && PL_last_lop_op != OP_GREPSTART))))
7462            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7463                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7464                                                 == OA_FILESTATOP))
7465            )
7466         {
7467             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7468             yyl_strictwarn_bareword(aTHX_ lastchar);
7469             op_free(c.rv2cv_op);
7470             return yyl_safe_bareword(aTHX_ s, lastchar);
7471         }
7472     }
7473
7474     PL_expect = XOPERATOR;
7475     s = skipspace(s);
7476
7477     /* Is this a word before a => operator? */
7478     if (*s == '=' && s[1] == '>' && !pkgname) {
7479         op_free(c.rv2cv_op);
7480         CLINE;
7481         if (c.gvp || (c.lex && !c.off)) {
7482             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7483             /* This is our own scalar, created a few lines
7484                above, so this is safe. */
7485             SvREADONLY_off(c.sv);
7486             sv_setpv(c.sv, PL_tokenbuf);
7487             if (UTF && !IN_BYTES
7488              && is_utf8_string((U8*)PL_tokenbuf, len))
7489                   SvUTF8_on(c.sv);
7490             SvREADONLY_on(c.sv);
7491         }
7492         TERM(BAREWORD);
7493     }
7494
7495     /* If followed by a paren, it's certainly a subroutine. */
7496     if (*s == '(') {
7497         CLINE;
7498         if (c.cv) {
7499             char *d = s + 1;
7500             while (SPACE_OR_TAB(*d))
7501                 d++;
7502             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7503                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7504         }
7505         NEXTVAL_NEXTTOKE.opval =
7506             c.off ? c.rv2cv_op : pl_yylval.opval;
7507         if (c.off)
7508              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7509         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7510         pl_yylval.ival = 0;
7511         TOKEN('&');
7512     }
7513
7514     /* If followed by var or block, call it a method (unless sub) */
7515
7516     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7517         op_free(c.rv2cv_op);
7518         PL_last_lop = PL_oldbufptr;
7519         PL_last_lop_op = OP_METHOD;
7520         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7521             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7522         PL_expect = XBLOCKTERM;
7523         PL_bufptr = s;
7524         return REPORT(METHOD);
7525     }
7526
7527     /* If followed by a bareword, see if it looks like indir obj. */
7528
7529     if (   key == 1
7530         && !orig_keyword
7531         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7532         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7533     {
7534       method:
7535         if (c.lex && !c.off) {
7536             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7537             SvREADONLY_off(c.sv);
7538             sv_setpvn(c.sv, PL_tokenbuf, len);
7539             if (UTF && !IN_BYTES
7540              && is_utf8_string((U8*)PL_tokenbuf, len))
7541                 SvUTF8_on(c.sv);
7542             else SvUTF8_off(c.sv);
7543         }
7544         op_free(c.rv2cv_op);
7545         if (key == METHOD && !PL_lex_allbrackets
7546             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7547         {
7548             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7549         }
7550         return REPORT(key);
7551     }
7552
7553     /* Not a method, so call it a subroutine (if defined) */
7554
7555     if (c.cv) {
7556         /* Check for a constant sub */
7557         c.sv = cv_const_sv_or_av(c.cv);
7558         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7559     }
7560
7561     /* Call it a bare word */
7562
7563     if (PL_hints & HINT_STRICT_SUBS)
7564         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7565     else
7566         yyl_strictwarn_bareword(aTHX_ lastchar);
7567
7568     op_free(c.rv2cv_op);
7569
7570     return yyl_safe_bareword(aTHX_ s, lastchar);
7571 }
7572
7573 static int
7574 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7575 {
7576     switch (key) {
7577     default:                    /* not a keyword */
7578         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7579
7580     case KEY___FILE__:
7581         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7582
7583     case KEY___LINE__:
7584         FUN0OP(
7585             newSVOP(OP_CONST, 0,
7586                 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7587         );
7588
7589     case KEY___PACKAGE__:
7590         FUN0OP(
7591             newSVOP(OP_CONST, 0, (PL_curstash
7592                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7593                                      : &PL_sv_undef))
7594         );
7595
7596     case KEY___DATA__:
7597     case KEY___END__:
7598         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7599             yyl_data_handle(aTHX);
7600         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7601
7602     case KEY___SUB__:
7603         FUN0OP(CvCLONE(PL_compcv)
7604                     ? newOP(OP_RUNCV, 0)
7605                     : newPVOP(OP_RUNCV,0,NULL));
7606
7607     case KEY_AUTOLOAD:
7608     case KEY_DESTROY:
7609     case KEY_BEGIN:
7610     case KEY_UNITCHECK:
7611     case KEY_CHECK:
7612     case KEY_INIT:
7613     case KEY_END:
7614         if (PL_expect == XSTATE)
7615             return yyl_sub(aTHX_ PL_bufptr, key);
7616         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7617
7618     case KEY_abs:
7619         UNI(OP_ABS);
7620
7621     case KEY_alarm:
7622         UNI(OP_ALARM);
7623
7624     case KEY_accept:
7625         LOP(OP_ACCEPT,XTERM);
7626
7627     case KEY_and:
7628         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7629             return REPORT(0);
7630         OPERATOR(ANDOP);
7631
7632     case KEY_atan2:
7633         LOP(OP_ATAN2,XTERM);
7634
7635     case KEY_bind:
7636         LOP(OP_BIND,XTERM);
7637
7638     case KEY_binmode:
7639         LOP(OP_BINMODE,XTERM);
7640
7641     case KEY_bless:
7642         LOP(OP_BLESS,XTERM);
7643
7644     case KEY_break:
7645         FUN0(OP_BREAK);
7646
7647     case KEY_chop:
7648         UNI(OP_CHOP);
7649
7650     case KEY_continue:
7651         /* We have to disambiguate the two senses of
7652           "continue". If the next token is a '{' then
7653           treat it as the start of a continue block;
7654           otherwise treat it as a control operator.
7655          */
7656         s = skipspace(s);
7657         if (*s == '{')
7658             PREBLOCK(CONTINUE);
7659         else
7660             FUN0(OP_CONTINUE);
7661
7662     case KEY_chdir:
7663         /* may use HOME */
7664         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7665         UNI(OP_CHDIR);
7666
7667     case KEY_close:
7668         UNI(OP_CLOSE);
7669
7670     case KEY_closedir:
7671         UNI(OP_CLOSEDIR);
7672
7673     case KEY_cmp:
7674         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7675             return REPORT(0);
7676         NCEop(OP_SCMP);
7677
7678     case KEY_caller:
7679         UNI(OP_CALLER);
7680
7681     case KEY_crypt:
7682 #ifdef FCRYPT
7683         if (!PL_cryptseen) {
7684             PL_cryptseen = TRUE;
7685             init_des();
7686         }
7687 #endif
7688         LOP(OP_CRYPT,XTERM);
7689
7690     case KEY_chmod:
7691         LOP(OP_CHMOD,XTERM);
7692
7693     case KEY_chown:
7694         LOP(OP_CHOWN,XTERM);
7695
7696     case KEY_connect:
7697         LOP(OP_CONNECT,XTERM);
7698
7699     case KEY_chr:
7700         UNI(OP_CHR);
7701
7702     case KEY_cos:
7703         UNI(OP_COS);
7704
7705     case KEY_chroot:
7706         UNI(OP_CHROOT);
7707
7708     case KEY_default:
7709         PREBLOCK(DEFAULT);
7710
7711     case KEY_do:
7712         return yyl_do(aTHX_ s, orig_keyword);
7713
7714     case KEY_die:
7715         PL_hints |= HINT_BLOCK_SCOPE;
7716         LOP(OP_DIE,XTERM);
7717
7718     case KEY_defined:
7719         UNI(OP_DEFINED);
7720
7721     case KEY_delete:
7722         UNI(OP_DELETE);
7723
7724     case KEY_dbmopen:
7725         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7726                           STR_WITH_LEN("NDBM_File::"),
7727                           STR_WITH_LEN("DB_File::"),
7728                           STR_WITH_LEN("GDBM_File::"),
7729                           STR_WITH_LEN("SDBM_File::"),
7730                           STR_WITH_LEN("ODBM_File::"),
7731                           NULL);
7732         LOP(OP_DBMOPEN,XTERM);
7733
7734     case KEY_dbmclose:
7735         UNI(OP_DBMCLOSE);
7736
7737     case KEY_dump:
7738         LOOPX(OP_DUMP);
7739
7740     case KEY_else:
7741         PREBLOCK(ELSE);
7742
7743     case KEY_elsif:
7744         pl_yylval.ival = CopLINE(PL_curcop);
7745         OPERATOR(ELSIF);
7746
7747     case KEY_eq:
7748         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7749             return REPORT(0);
7750         ChEop(OP_SEQ);
7751
7752     case KEY_exists:
7753         UNI(OP_EXISTS);
7754
7755     case KEY_exit:
7756         UNI(OP_EXIT);
7757
7758     case KEY_eval:
7759         s = skipspace(s);
7760         if (*s == '{') { /* block eval */
7761             PL_expect = XTERMBLOCK;
7762             UNIBRACK(OP_ENTERTRY);
7763         }
7764         else { /* string eval */
7765             PL_expect = XTERM;
7766             UNIBRACK(OP_ENTEREVAL);
7767         }
7768
7769     case KEY_evalbytes:
7770         PL_expect = XTERM;
7771         UNIBRACK(-OP_ENTEREVAL);
7772
7773     case KEY_eof:
7774         UNI(OP_EOF);
7775
7776     case KEY_exp:
7777         UNI(OP_EXP);
7778
7779     case KEY_each:
7780         UNI(OP_EACH);
7781
7782     case KEY_exec:
7783         LOP(OP_EXEC,XREF);
7784
7785     case KEY_endhostent:
7786         FUN0(OP_EHOSTENT);
7787
7788     case KEY_endnetent:
7789         FUN0(OP_ENETENT);
7790
7791     case KEY_endservent:
7792         FUN0(OP_ESERVENT);
7793
7794     case KEY_endprotoent:
7795         FUN0(OP_EPROTOENT);
7796
7797     case KEY_endpwent:
7798         FUN0(OP_EPWENT);
7799
7800     case KEY_endgrent:
7801         FUN0(OP_EGRENT);
7802
7803     case KEY_for:
7804     case KEY_foreach:
7805         return yyl_foreach(aTHX_ s);
7806
7807     case KEY_formline:
7808         LOP(OP_FORMLINE,XTERM);
7809
7810     case KEY_fork:
7811         FUN0(OP_FORK);
7812
7813     case KEY_fc:
7814         UNI(OP_FC);
7815
7816     case KEY_fcntl:
7817         LOP(OP_FCNTL,XTERM);
7818
7819     case KEY_fileno:
7820         UNI(OP_FILENO);
7821
7822     case KEY_flock:
7823         LOP(OP_FLOCK,XTERM);
7824
7825     case KEY_gt:
7826         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7827             return REPORT(0);
7828         ChRop(OP_SGT);
7829
7830     case KEY_ge:
7831         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7832             return REPORT(0);
7833         ChRop(OP_SGE);
7834
7835     case KEY_grep:
7836         LOP(OP_GREPSTART, XREF);
7837
7838     case KEY_goto:
7839         LOOPX(OP_GOTO);
7840
7841     case KEY_gmtime:
7842         UNI(OP_GMTIME);
7843
7844     case KEY_getc:
7845         UNIDOR(OP_GETC);
7846
7847     case KEY_getppid:
7848         FUN0(OP_GETPPID);
7849
7850     case KEY_getpgrp:
7851         UNI(OP_GETPGRP);
7852
7853     case KEY_getpriority:
7854         LOP(OP_GETPRIORITY,XTERM);
7855
7856     case KEY_getprotobyname:
7857         UNI(OP_GPBYNAME);
7858
7859     case KEY_getprotobynumber:
7860         LOP(OP_GPBYNUMBER,XTERM);
7861
7862     case KEY_getprotoent:
7863         FUN0(OP_GPROTOENT);
7864
7865     case KEY_getpwent:
7866         FUN0(OP_GPWENT);
7867
7868     case KEY_getpwnam:
7869         UNI(OP_GPWNAM);
7870
7871     case KEY_getpwuid:
7872         UNI(OP_GPWUID);
7873
7874     case KEY_getpeername:
7875         UNI(OP_GETPEERNAME);
7876
7877     case KEY_gethostbyname:
7878         UNI(OP_GHBYNAME);
7879
7880     case KEY_gethostbyaddr:
7881         LOP(OP_GHBYADDR,XTERM);
7882
7883     case KEY_gethostent:
7884         FUN0(OP_GHOSTENT);
7885
7886     case KEY_getnetbyname:
7887         UNI(OP_GNBYNAME);
7888
7889     case KEY_getnetbyaddr:
7890         LOP(OP_GNBYADDR,XTERM);
7891
7892     case KEY_getnetent:
7893         FUN0(OP_GNETENT);
7894
7895     case KEY_getservbyname:
7896         LOP(OP_GSBYNAME,XTERM);
7897
7898     case KEY_getservbyport:
7899         LOP(OP_GSBYPORT,XTERM);
7900
7901     case KEY_getservent:
7902         FUN0(OP_GSERVENT);
7903
7904     case KEY_getsockname:
7905         UNI(OP_GETSOCKNAME);
7906
7907     case KEY_getsockopt:
7908         LOP(OP_GSOCKOPT,XTERM);
7909
7910     case KEY_getgrent:
7911         FUN0(OP_GGRENT);
7912
7913     case KEY_getgrnam:
7914         UNI(OP_GGRNAM);
7915
7916     case KEY_getgrgid:
7917         UNI(OP_GGRGID);
7918
7919     case KEY_getlogin:
7920         FUN0(OP_GETLOGIN);
7921
7922     case KEY_given:
7923         pl_yylval.ival = CopLINE(PL_curcop);
7924         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7925                          "given is experimental");
7926         OPERATOR(GIVEN);
7927
7928     case KEY_glob:
7929         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7930
7931     case KEY_hex:
7932         UNI(OP_HEX);
7933
7934     case KEY_if:
7935         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7936             return REPORT(0);
7937         pl_yylval.ival = CopLINE(PL_curcop);
7938         OPERATOR(IF);
7939
7940     case KEY_index:
7941         LOP(OP_INDEX,XTERM);
7942
7943     case KEY_int:
7944         UNI(OP_INT);
7945
7946     case KEY_ioctl:
7947         LOP(OP_IOCTL,XTERM);
7948
7949     case KEY_isa:
7950         Perl_ck_warner_d(aTHX_
7951             packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
7952         NCRop(OP_ISA);
7953
7954     case KEY_join:
7955         LOP(OP_JOIN,XTERM);
7956
7957     case KEY_keys:
7958         UNI(OP_KEYS);
7959
7960     case KEY_kill:
7961         LOP(OP_KILL,XTERM);
7962
7963     case KEY_last:
7964         LOOPX(OP_LAST);
7965
7966     case KEY_lc:
7967         UNI(OP_LC);
7968
7969     case KEY_lcfirst:
7970         UNI(OP_LCFIRST);
7971
7972     case KEY_local:
7973         OPERATOR(LOCAL);
7974
7975     case KEY_length:
7976         UNI(OP_LENGTH);
7977
7978     case KEY_lt:
7979         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7980             return REPORT(0);
7981         ChRop(OP_SLT);
7982
7983     case KEY_le:
7984         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7985             return REPORT(0);
7986         ChRop(OP_SLE);
7987
7988     case KEY_localtime:
7989         UNI(OP_LOCALTIME);
7990
7991     case KEY_log:
7992         UNI(OP_LOG);
7993
7994     case KEY_link:
7995         LOP(OP_LINK,XTERM);
7996
7997     case KEY_listen:
7998         LOP(OP_LISTEN,XTERM);
7999
8000     case KEY_lock:
8001         UNI(OP_LOCK);
8002
8003     case KEY_lstat:
8004         UNI(OP_LSTAT);
8005
8006     case KEY_m:
8007         s = scan_pat(s,OP_MATCH);
8008         TERM(sublex_start());
8009
8010     case KEY_map:
8011         LOP(OP_MAPSTART, XREF);
8012
8013     case KEY_mkdir:
8014         LOP(OP_MKDIR,XTERM);
8015
8016     case KEY_msgctl:
8017         LOP(OP_MSGCTL,XTERM);
8018
8019     case KEY_msgget:
8020         LOP(OP_MSGGET,XTERM);
8021
8022     case KEY_msgrcv:
8023         LOP(OP_MSGRCV,XTERM);
8024
8025     case KEY_msgsnd:
8026         LOP(OP_MSGSND,XTERM);
8027
8028     case KEY_our:
8029     case KEY_my:
8030     case KEY_state:
8031         return yyl_my(aTHX_ s, key);
8032
8033     case KEY_next:
8034         LOOPX(OP_NEXT);
8035
8036     case KEY_ne:
8037         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8038             return REPORT(0);
8039         ChEop(OP_SNE);
8040
8041     case KEY_no:
8042         s = tokenize_use(0, s);
8043         TOKEN(USE);
8044
8045     case KEY_not:
8046         if (*s == '(' || (s = skipspace(s), *s == '('))
8047             FUN1(OP_NOT);
8048         else {
8049             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8050                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8051             OPERATOR(NOTOP);
8052         }
8053
8054     case KEY_open:
8055         s = skipspace(s);
8056         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8057             const char *t;
8058             char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8059             for (t=d; isSPACE(*t);)
8060                 t++;
8061             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8062                 /* [perl #16184] */
8063                 && !(t[0] == '=' && t[1] == '>')
8064                 && !(t[0] == ':' && t[1] == ':')
8065                 && !keyword(s, d-s, 0)
8066             ) {
8067                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8068                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8069                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8070             }
8071         }
8072         LOP(OP_OPEN,XTERM);
8073
8074     case KEY_or:
8075         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8076             return REPORT(0);
8077         pl_yylval.ival = OP_OR;
8078         OPERATOR(OROP);
8079
8080     case KEY_ord:
8081         UNI(OP_ORD);
8082
8083     case KEY_oct:
8084         UNI(OP_OCT);
8085
8086     case KEY_opendir:
8087         LOP(OP_OPEN_DIR,XTERM);
8088
8089     case KEY_print:
8090         checkcomma(s,PL_tokenbuf,"filehandle");
8091         LOP(OP_PRINT,XREF);
8092
8093     case KEY_printf:
8094         checkcomma(s,PL_tokenbuf,"filehandle");
8095         LOP(OP_PRTF,XREF);
8096
8097     case KEY_prototype:
8098         UNI(OP_PROTOTYPE);
8099
8100     case KEY_push:
8101         LOP(OP_PUSH,XTERM);
8102
8103     case KEY_pop:
8104         UNIDOR(OP_POP);
8105
8106     case KEY_pos:
8107         UNIDOR(OP_POS);
8108
8109     case KEY_pack:
8110         LOP(OP_PACK,XTERM);
8111
8112     case KEY_package:
8113         s = force_word(s,BAREWORD,FALSE,TRUE);
8114         s = skipspace(s);
8115         s = force_strict_version(s);
8116         PREBLOCK(PACKAGE);
8117
8118     case KEY_pipe:
8119         LOP(OP_PIPE_OP,XTERM);
8120
8121     case KEY_q:
8122         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8123         if (!s)
8124             missingterm(NULL, 0);
8125         COPLINE_SET_FROM_MULTI_END;
8126         pl_yylval.ival = OP_CONST;
8127         TERM(sublex_start());
8128
8129     case KEY_quotemeta:
8130         UNI(OP_QUOTEMETA);
8131
8132     case KEY_qw:
8133         return yyl_qw(aTHX_ s, len);
8134
8135     case KEY_qq:
8136         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8137         if (!s)
8138             missingterm(NULL, 0);
8139         pl_yylval.ival = OP_STRINGIFY;
8140         if (SvIVX(PL_lex_stuff) == '\'')
8141             SvIV_set(PL_lex_stuff, 0);  /* qq'$foo' should interpolate */
8142         TERM(sublex_start());
8143
8144     case KEY_qr:
8145         s = scan_pat(s,OP_QR);
8146         TERM(sublex_start());
8147
8148     case KEY_qx:
8149         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8150         if (!s)
8151             missingterm(NULL, 0);
8152         pl_yylval.ival = OP_BACKTICK;
8153         TERM(sublex_start());
8154
8155     case KEY_return:
8156         OLDLOP(OP_RETURN);
8157
8158     case KEY_require:
8159         return yyl_require(aTHX_ s, orig_keyword);
8160
8161     case KEY_reset:
8162         UNI(OP_RESET);
8163
8164     case KEY_redo:
8165         LOOPX(OP_REDO);
8166
8167     case KEY_rename:
8168         LOP(OP_RENAME,XTERM);
8169
8170     case KEY_rand:
8171         UNI(OP_RAND);
8172
8173     case KEY_rmdir:
8174         UNI(OP_RMDIR);
8175
8176     case KEY_rindex:
8177         LOP(OP_RINDEX,XTERM);
8178
8179     case KEY_read:
8180         LOP(OP_READ,XTERM);
8181
8182     case KEY_readdir:
8183         UNI(OP_READDIR);
8184
8185     case KEY_readline:
8186         UNIDOR(OP_READLINE);
8187
8188     case KEY_readpipe:
8189         UNIDOR(OP_BACKTICK);
8190
8191     case KEY_rewinddir:
8192         UNI(OP_REWINDDIR);
8193
8194     case KEY_recv:
8195         LOP(OP_RECV,XTERM);
8196
8197     case KEY_reverse:
8198         LOP(OP_REVERSE,XTERM);
8199
8200     case KEY_readlink:
8201         UNIDOR(OP_READLINK);
8202
8203     case KEY_ref:
8204         UNI(OP_REF);
8205
8206     case KEY_s:
8207         s = scan_subst(s);
8208         if (pl_yylval.opval)
8209             TERM(sublex_start());
8210         else
8211             TOKEN(1);   /* force error */
8212
8213     case KEY_say:
8214         checkcomma(s,PL_tokenbuf,"filehandle");
8215         LOP(OP_SAY,XREF);
8216
8217     case KEY_chomp:
8218         UNI(OP_CHOMP);
8219
8220     case KEY_scalar:
8221         UNI(OP_SCALAR);
8222
8223     case KEY_select:
8224         LOP(OP_SELECT,XTERM);
8225
8226     case KEY_seek:
8227         LOP(OP_SEEK,XTERM);
8228
8229     case KEY_semctl:
8230         LOP(OP_SEMCTL,XTERM);
8231
8232     case KEY_semget:
8233         LOP(OP_SEMGET,XTERM);
8234
8235     case KEY_semop:
8236         LOP(OP_SEMOP,XTERM);
8237
8238     case KEY_send:
8239         LOP(OP_SEND,XTERM);
8240
8241     case KEY_setpgrp:
8242         LOP(OP_SETPGRP,XTERM);
8243
8244     case KEY_setpriority:
8245         LOP(OP_SETPRIORITY,XTERM);
8246
8247     case KEY_sethostent:
8248         UNI(OP_SHOSTENT);
8249
8250     case KEY_setnetent:
8251         UNI(OP_SNETENT);
8252
8253     case KEY_setservent:
8254         UNI(OP_SSERVENT);
8255
8256     case KEY_setprotoent:
8257         UNI(OP_SPROTOENT);
8258
8259     case KEY_setpwent:
8260         FUN0(OP_SPWENT);
8261
8262     case KEY_setgrent:
8263         FUN0(OP_SGRENT);
8264
8265     case KEY_seekdir:
8266         LOP(OP_SEEKDIR,XTERM);
8267
8268     case KEY_setsockopt:
8269         LOP(OP_SSOCKOPT,XTERM);
8270
8271     case KEY_shift:
8272         UNIDOR(OP_SHIFT);
8273
8274     case KEY_shmctl:
8275         LOP(OP_SHMCTL,XTERM);
8276
8277     case KEY_shmget:
8278         LOP(OP_SHMGET,XTERM);
8279
8280     case KEY_shmread:
8281         LOP(OP_SHMREAD,XTERM);
8282
8283     case KEY_shmwrite:
8284         LOP(OP_SHMWRITE,XTERM);
8285
8286     case KEY_shutdown:
8287         LOP(OP_SHUTDOWN,XTERM);
8288
8289     case KEY_sin:
8290         UNI(OP_SIN);
8291
8292     case KEY_sleep:
8293         UNI(OP_SLEEP);
8294
8295     case KEY_socket:
8296         LOP(OP_SOCKET,XTERM);
8297
8298     case KEY_socketpair:
8299         LOP(OP_SOCKPAIR,XTERM);
8300
8301     case KEY_sort:
8302         checkcomma(s,PL_tokenbuf,"subroutine name");
8303         s = skipspace(s);
8304         PL_expect = XTERM;
8305         s = force_word(s,BAREWORD,TRUE,TRUE);
8306         LOP(OP_SORT,XREF);
8307
8308     case KEY_split:
8309         LOP(OP_SPLIT,XTERM);
8310
8311     case KEY_sprintf:
8312         LOP(OP_SPRINTF,XTERM);
8313
8314     case KEY_splice:
8315         LOP(OP_SPLICE,XTERM);
8316
8317     case KEY_sqrt:
8318         UNI(OP_SQRT);
8319
8320     case KEY_srand:
8321         UNI(OP_SRAND);
8322
8323     case KEY_stat:
8324         UNI(OP_STAT);
8325
8326     case KEY_study:
8327         UNI(OP_STUDY);
8328
8329     case KEY_substr:
8330         LOP(OP_SUBSTR,XTERM);
8331
8332     case KEY_format:
8333     case KEY_sub:
8334         return yyl_sub(aTHX_ s, key);
8335
8336     case KEY_system:
8337         LOP(OP_SYSTEM,XREF);
8338
8339     case KEY_symlink:
8340         LOP(OP_SYMLINK,XTERM);
8341
8342     case KEY_syscall:
8343         LOP(OP_SYSCALL,XTERM);
8344
8345     case KEY_sysopen:
8346         LOP(OP_SYSOPEN,XTERM);
8347
8348     case KEY_sysseek:
8349         LOP(OP_SYSSEEK,XTERM);
8350
8351     case KEY_sysread:
8352         LOP(OP_SYSREAD,XTERM);
8353
8354     case KEY_syswrite:
8355         LOP(OP_SYSWRITE,XTERM);
8356
8357     case KEY_tr:
8358     case KEY_y:
8359         s = scan_trans(s);
8360         TERM(sublex_start());
8361
8362     case KEY_tell:
8363         UNI(OP_TELL);
8364
8365     case KEY_telldir:
8366         UNI(OP_TELLDIR);
8367
8368     case KEY_tie:
8369         LOP(OP_TIE,XTERM);
8370
8371     case KEY_tied:
8372         UNI(OP_TIED);
8373
8374     case KEY_time:
8375         FUN0(OP_TIME);
8376
8377     case KEY_times:
8378         FUN0(OP_TMS);
8379
8380     case KEY_truncate:
8381         LOP(OP_TRUNCATE,XTERM);
8382
8383     case KEY_uc:
8384         UNI(OP_UC);
8385
8386     case KEY_ucfirst:
8387         UNI(OP_UCFIRST);
8388
8389     case KEY_untie:
8390         UNI(OP_UNTIE);
8391
8392     case KEY_until:
8393         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8394             return REPORT(0);
8395         pl_yylval.ival = CopLINE(PL_curcop);
8396         OPERATOR(UNTIL);
8397
8398     case KEY_unless:
8399         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8400             return REPORT(0);
8401         pl_yylval.ival = CopLINE(PL_curcop);
8402         OPERATOR(UNLESS);
8403
8404     case KEY_unlink:
8405         LOP(OP_UNLINK,XTERM);
8406
8407     case KEY_undef:
8408         UNIDOR(OP_UNDEF);
8409
8410     case KEY_unpack:
8411         LOP(OP_UNPACK,XTERM);
8412
8413     case KEY_utime:
8414         LOP(OP_UTIME,XTERM);
8415
8416     case KEY_umask:
8417         UNIDOR(OP_UMASK);
8418
8419     case KEY_unshift:
8420         LOP(OP_UNSHIFT,XTERM);
8421
8422     case KEY_use:
8423         s = tokenize_use(1, s);
8424         TOKEN(USE);
8425
8426     case KEY_values:
8427         UNI(OP_VALUES);
8428
8429     case KEY_vec:
8430         LOP(OP_VEC,XTERM);
8431
8432     case KEY_when:
8433         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8434             return REPORT(0);
8435         pl_yylval.ival = CopLINE(PL_curcop);
8436         Perl_ck_warner_d(aTHX_
8437             packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8438             "when is experimental");
8439         OPERATOR(WHEN);
8440
8441     case KEY_while:
8442         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8443             return REPORT(0);
8444         pl_yylval.ival = CopLINE(PL_curcop);
8445         OPERATOR(WHILE);
8446
8447     case KEY_warn:
8448         PL_hints |= HINT_BLOCK_SCOPE;
8449         LOP(OP_WARN,XTERM);
8450
8451     case KEY_wait:
8452         FUN0(OP_WAIT);
8453
8454     case KEY_waitpid:
8455         LOP(OP_WAITPID,XTERM);
8456
8457     case KEY_wantarray:
8458         FUN0(OP_WANTARRAY);
8459
8460     case KEY_write:
8461         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8462          * we use the same number on EBCDIC */
8463         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8464         UNI(OP_ENTERWRITE);
8465
8466     case KEY_x:
8467         if (PL_expect == XOPERATOR) {
8468             if (*s == '=' && !PL_lex_allbrackets
8469                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8470             {
8471                 return REPORT(0);
8472             }
8473             Mop(OP_REPEAT);
8474         }
8475         check_uni();
8476         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8477
8478     case KEY_xor:
8479         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8480             return REPORT(0);
8481         pl_yylval.ival = OP_XOR;
8482         OPERATOR(OROP);
8483     }
8484 }
8485
8486 static int
8487 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8488 {
8489     I32 key = 0;
8490     I32 orig_keyword = 0;
8491     STRLEN olen = len;
8492     char *d = s;
8493     s += 2;
8494     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8495     if ((*s == ':' && s[1] == ':')
8496         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8497     {
8498         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8499         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8500     }
8501     if (!key)
8502         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8503                           UTF8fARG(UTF, len, PL_tokenbuf));
8504     if (key < 0)
8505         key = -key;
8506     else if (key == KEY_require || key == KEY_do
8507           || key == KEY_glob)
8508         /* that's a way to remember we saw "CORE::" */
8509         orig_keyword = key;
8510
8511     /* Known to be a reserved word at this point */
8512     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8513 }
8514
8515 static int
8516 yyl_keylookup(pTHX_ char *s, GV *gv)
8517 {
8518     dVAR;
8519     STRLEN len;
8520     bool anydelim;
8521     I32 key;
8522     struct code c = no_code;
8523     I32 orig_keyword = 0;
8524     char *d;
8525
8526     c.gv = gv;
8527
8528     PL_bufptr = s;
8529     s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8530
8531     /* Some keywords can be followed by any delimiter, including ':' */
8532     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8533
8534     /* x::* is just a word, unless x is "CORE" */
8535     if (!anydelim && *s == ':' && s[1] == ':') {
8536         if (memEQs(PL_tokenbuf, len, "CORE"))
8537             return yyl_key_core(aTHX_ s, len, c);
8538         return yyl_just_a_word(aTHX_ s, len, 0, c);
8539     }
8540
8541     d = s;
8542     while (d < PL_bufend && isSPACE(*d))
8543             d++;        /* no comments skipped here, or s### is misparsed */
8544
8545     /* Is this a word before a => operator? */
8546     if (*d == '=' && d[1] == '>') {
8547         return yyl_fatcomma(aTHX_ s, len);
8548     }
8549
8550     /* Check for plugged-in keyword */
8551     {
8552         OP *o;
8553         int result;
8554         char *saved_bufptr = PL_bufptr;
8555         PL_bufptr = s;
8556         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8557         s = PL_bufptr;
8558         if (result == KEYWORD_PLUGIN_DECLINE) {
8559             /* not a plugged-in keyword */
8560             PL_bufptr = saved_bufptr;
8561         } else if (result == KEYWORD_PLUGIN_STMT) {
8562             pl_yylval.opval = o;
8563             CLINE;
8564             if (!PL_nexttoke) PL_expect = XSTATE;
8565             return REPORT(PLUGSTMT);
8566         } else if (result == KEYWORD_PLUGIN_EXPR) {
8567             pl_yylval.opval = o;
8568             CLINE;
8569             if (!PL_nexttoke) PL_expect = XOPERATOR;
8570             return REPORT(PLUGEXPR);
8571         } else {
8572             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8573         }
8574     }
8575
8576     /* Is this a label? */
8577     if (!anydelim && PL_expect == XSTATE
8578           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8579         s = d + 1;
8580         pl_yylval.opval =
8581             newSVOP(OP_CONST, 0,
8582                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8583         CLINE;
8584         TOKEN(LABEL);
8585     }
8586
8587     /* Check for lexical sub */
8588     if (PL_expect != XOPERATOR) {
8589         char tmpbuf[sizeof PL_tokenbuf + 1];
8590         *tmpbuf = '&';
8591         Copy(PL_tokenbuf, tmpbuf+1, len, char);
8592         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8593         if (c.off != NOT_IN_PAD) {
8594             assert(c.off); /* we assume this is boolean-true below */
8595             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8596                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
8597                 HEK * const stashname = HvNAME_HEK(stash);
8598                 c.sv = newSVhek(stashname);
8599                 sv_catpvs(c.sv, "::");
8600                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8601                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8602                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8603                                   SVt_PVCV);
8604                 c.off = 0;
8605                 if (!c.gv) {
8606                     sv_free(c.sv);
8607                     c.sv = NULL;
8608                     return yyl_just_a_word(aTHX_ s, len, 0, c);
8609                 }
8610             }
8611             else {
8612                 c.rv2cv_op = newOP(OP_PADANY, 0);
8613                 c.rv2cv_op->op_targ = c.off;
8614                 c.cv = find_lexical_cv(c.off);
8615             }
8616             c.lex = TRUE;
8617             return yyl_just_a_word(aTHX_ s, len, 0, c);
8618         }
8619         c.off = 0;
8620     }
8621
8622     /* Check for built-in keyword */
8623     key = keyword(PL_tokenbuf, len, 0);
8624
8625     if (key < 0)
8626         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8627
8628     if (key && key != KEY___DATA__ && key != KEY___END__
8629      && (!anydelim || *s != '#')) {
8630         /* no override, and not s### either; skipspace is safe here
8631          * check for => on following line */
8632         bool arrow;
8633         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8634         STRLEN   soff = s         - SvPVX(PL_linestr);
8635         s = peekspace(s);
8636         arrow = *s == '=' && s[1] == '>';
8637         PL_bufptr = SvPVX(PL_linestr) + bufoff;
8638         s         = SvPVX(PL_linestr) +   soff;
8639         if (arrow)
8640             return yyl_fatcomma(aTHX_ s, len);
8641     }
8642
8643     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8644 }
8645
8646 static int
8647 yyl_try(pTHX_ char *s)
8648 {
8649     char *d;
8650     GV *gv = NULL;
8651     int tok;
8652
8653   retry:
8654     switch (*s) {
8655     default:
8656         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8657             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8658                 return tok;
8659             goto retry_bufptr;
8660         }
8661         yyl_croak_unrecognised(aTHX_ s);
8662
8663     case 4:
8664     case 26:
8665         /* emulate EOF on ^D or ^Z */
8666         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8667             return tok;
8668     retry_bufptr:
8669         s = PL_bufptr;
8670         goto retry;
8671
8672     case 0:
8673         if ((!PL_rsfp || PL_lex_inwhat)
8674          && (!PL_parser->filtered || s+1 < PL_bufend)) {
8675             PL_last_uni = 0;
8676             PL_last_lop = 0;
8677             if (PL_lex_brackets
8678                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8679             {
8680                 yyerror((const char *)
8681                         (PL_lex_formbrack
8682                          ? "Format not terminated"
8683                          : "Missing right curly or square bracket"));
8684             }
8685             DEBUG_T({
8686                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8687             });
8688             TOKEN(0);
8689         }
8690         if (s++ < PL_bufend)
8691             goto retry;  /* ignore stray nulls */
8692         PL_last_uni = 0;
8693         PL_last_lop = 0;
8694         if (!PL_in_eval && !PL_preambled) {
8695             PL_preambled = TRUE;
8696             if (PL_perldb) {
8697                 /* Generate a string of Perl code to load the debugger.
8698                  * If PERL5DB is set, it will return the contents of that,
8699                  * otherwise a compile-time require of perl5db.pl.  */
8700
8701                 const char * const pdb = PerlEnv_getenv("PERL5DB");
8702
8703                 if (pdb) {
8704                     sv_setpv(PL_linestr, pdb);
8705                     sv_catpvs(PL_linestr,";");
8706                 } else {
8707                     SETERRNO(0,SS_NORMAL);
8708                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8709                 }
8710                 PL_parser->preambling = CopLINE(PL_curcop);
8711             } else
8712                 SvPVCLEAR(PL_linestr);
8713             if (PL_preambleav) {
8714                 SV **svp = AvARRAY(PL_preambleav);
8715                 SV **const end = svp + AvFILLp(PL_preambleav);
8716                 while(svp <= end) {
8717                     sv_catsv(PL_linestr, *svp);
8718                     ++svp;
8719                     sv_catpvs(PL_linestr, ";");
8720                 }
8721                 sv_free(MUTABLE_SV(PL_preambleav));
8722                 PL_preambleav = NULL;
8723             }
8724             if (PL_minus_E)
8725                 sv_catpvs(PL_linestr,
8726                           "use feature ':5." STRINGIFY(PERL_VERSION) "';");
8727             if (PL_minus_n || PL_minus_p) {
8728                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8729                 if (PL_minus_l)
8730                     sv_catpvs(PL_linestr,"chomp;");
8731                 if (PL_minus_a) {
8732                     if (PL_minus_F) {
8733                         if (   (   *PL_splitstr == '/'
8734                                 || *PL_splitstr == '\''
8735                                 || *PL_splitstr == '"')
8736                             && strchr(PL_splitstr + 1, *PL_splitstr))
8737                         {
8738                             /* strchr is ok, because -F pattern can't contain
8739                              * embeddded NULs */
8740                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8741                         }
8742                         else {
8743                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8744                                bytes can be used as quoting characters.  :-) */
8745                             const char *splits = PL_splitstr;
8746                             sv_catpvs(PL_linestr, "our @F=split(q\0");
8747                             do {
8748                                 /* Need to \ \s  */
8749                                 if (*splits == '\\')
8750                                     sv_catpvn(PL_linestr, splits, 1);
8751                                 sv_catpvn(PL_linestr, splits, 1);
8752                             } while (*splits++);
8753                             /* This loop will embed the trailing NUL of
8754                                PL_linestr as the last thing it does before
8755                                terminating.  */
8756                             sv_catpvs(PL_linestr, ");");
8757                         }
8758                     }
8759                     else
8760                         sv_catpvs(PL_linestr,"our @F=split(' ');");
8761                 }
8762             }
8763             sv_catpvs(PL_linestr, "\n");
8764             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8765             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8766             PL_last_lop = PL_last_uni = NULL;
8767             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8768                 update_debugger_info(PL_linestr, NULL, 0);
8769             goto retry;
8770         }
8771         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8772             return tok;
8773         goto retry_bufptr;
8774
8775     case '\r':
8776 #ifdef PERL_STRICT_CR
8777         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8778         Perl_croak(aTHX_
8779       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8780 #endif
8781     case ' ': case '\t': case '\f': case '\v':
8782         s++;
8783         goto retry;
8784
8785     case '#':
8786     case '\n': {
8787         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8788         if (needs_semicolon)
8789             TOKEN(';');
8790         else
8791             goto retry;
8792     }
8793
8794     case '-':
8795         return yyl_hyphen(aTHX_ s);
8796
8797     case '+':
8798         return yyl_plus(aTHX_ s);
8799
8800     case '*':
8801         return yyl_star(aTHX_ s);
8802
8803     case '%':
8804         return yyl_percent(aTHX_ s);
8805
8806     case '^':
8807         return yyl_caret(aTHX_ s);
8808
8809     case '[':
8810         return yyl_leftsquare(aTHX_ s);
8811
8812     case '~':
8813         return yyl_tilde(aTHX_ s);
8814
8815     case ',':
8816         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8817             TOKEN(0);
8818         s++;
8819         OPERATOR(',');
8820     case ':':
8821         if (s[1] == ':')
8822             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8823         return yyl_colon(aTHX_ s + 1);
8824
8825     case '(':
8826         return yyl_leftparen(aTHX_ s + 1);
8827
8828     case ';':
8829         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8830             TOKEN(0);
8831         CLINE;
8832         s++;
8833         PL_expect = XSTATE;
8834         TOKEN(';');
8835
8836     case ')':
8837         return yyl_rightparen(aTHX_ s);
8838
8839     case ']':
8840         return yyl_rightsquare(aTHX_ s);
8841
8842     case '{':
8843         return yyl_leftcurly(aTHX_ s + 1, 0);
8844
8845     case '}':
8846         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8847             TOKEN(0);
8848         return yyl_rightcurly(aTHX_ s, 0);
8849
8850     case '&':
8851         return yyl_ampersand(aTHX_ s);
8852
8853     case '|':
8854         return yyl_verticalbar(aTHX_ s);
8855
8856     case '=':
8857         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8858             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
8859         {
8860             s = vcs_conflict_marker(s + 7);
8861             goto retry;
8862         }
8863
8864         s++;
8865         {
8866             const char tmp = *s++;
8867             if (tmp == '=') {
8868                 if (!PL_lex_allbrackets
8869                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8870                 {
8871                     s -= 2;
8872                     TOKEN(0);
8873                 }
8874                 ChEop(OP_EQ);
8875             }
8876             if (tmp == '>') {
8877                 if (!PL_lex_allbrackets
8878                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8879                 {
8880                     s -= 2;
8881                     TOKEN(0);
8882                 }
8883                 OPERATOR(',');
8884             }
8885             if (tmp == '~')
8886                 PMop(OP_MATCH);
8887             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8888                 && memCHRs("+-*/%.^&|<",tmp))
8889                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8890                             "Reversed %c= operator",(int)tmp);
8891             s--;
8892             if (PL_expect == XSTATE
8893                 && isALPHA(tmp)
8894                 && (s == PL_linestart+1 || s[-2] == '\n') )
8895             {
8896                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8897                     || PL_lex_state != LEX_NORMAL)
8898                 {
8899                     d = PL_bufend;
8900                     while (s < d) {
8901                         if (*s++ == '\n') {
8902                             incline(s, PL_bufend);
8903                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8904                             {
8905                                 s = (char *) memchr(s,'\n', d - s);
8906                                 if (s)
8907                                     s++;
8908                                 else
8909                                     s = d;
8910                                 incline(s, PL_bufend);
8911                                 goto retry;
8912                             }
8913                         }
8914                     }
8915                     goto retry;
8916                 }
8917                 s = PL_bufend;
8918                 PL_parser->in_pod = 1;
8919                 goto retry;
8920             }
8921         }
8922         if (PL_expect == XBLOCK) {
8923             const char *t = s;
8924 #ifdef PERL_STRICT_CR
8925             while (SPACE_OR_TAB(*t))
8926 #else
8927             while (SPACE_OR_TAB(*t) || *t == '\r')
8928 #endif
8929                 t++;
8930             if (*t == '\n' || *t == '#') {
8931                 ENTER_with_name("lex_format");
8932                 SAVEI8(PL_parser->form_lex_state);
8933                 SAVEI32(PL_lex_formbrack);
8934                 PL_parser->form_lex_state = PL_lex_state;
8935                 PL_lex_formbrack = PL_lex_brackets + 1;
8936                 PL_parser->sub_error_count = PL_error_count;
8937                 return yyl_leftcurly(aTHX_ s, 1);
8938             }
8939         }
8940         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8941             s--;
8942             TOKEN(0);
8943         }
8944         pl_yylval.ival = 0;
8945         OPERATOR(ASSIGNOP);
8946
8947     case '!':
8948         return yyl_bang(aTHX_ s + 1);
8949
8950     case '<':
8951         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8952             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
8953         {
8954             s = vcs_conflict_marker(s + 7);
8955             goto retry;
8956         }
8957         return yyl_leftpointy(aTHX_ s);
8958
8959     case '>':
8960         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8961             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
8962         {
8963             s = vcs_conflict_marker(s + 7);
8964             goto retry;
8965         }
8966         return yyl_rightpointy(aTHX_ s + 1);
8967
8968     case '$':
8969         return yyl_dollar(aTHX_ s);
8970
8971     case '@':
8972         return yyl_snail(aTHX_ s);
8973
8974     case '/':                   /* may be division, defined-or, or pattern */
8975         return yyl_slash(aTHX_ s);
8976
8977      case '?':                  /* conditional */
8978         s++;
8979         if (!PL_lex_allbrackets
8980             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
8981         {
8982             s--;
8983             TOKEN(0);
8984         }
8985         PL_lex_allbrackets++;
8986         OPERATOR('?');
8987
8988     case '.':
8989         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
8990 #ifdef PERL_STRICT_CR
8991             && s[1] == '\n'
8992 #else
8993             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
8994 #endif
8995             && (s == PL_linestart || s[-1] == '\n') )
8996         {
8997             PL_expect = XSTATE;
8998             /* formbrack==2 means dot seen where arguments expected */
8999             return yyl_rightcurly(aTHX_ s, 2);
9000         }
9001         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9002             s += 3;
9003             OPERATOR(YADAYADA);
9004         }
9005         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9006             char tmp = *s++;
9007             if (*s == tmp) {
9008                 if (!PL_lex_allbrackets
9009                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9010                 {
9011                     s--;
9012                     TOKEN(0);
9013                 }
9014                 s++;
9015                 if (*s == tmp) {
9016                     s++;
9017                     pl_yylval.ival = OPf_SPECIAL;
9018                 }
9019                 else
9020                     pl_yylval.ival = 0;
9021                 OPERATOR(DOTDOT);
9022             }
9023             if (*s == '=' && !PL_lex_allbrackets
9024                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9025             {
9026                 s--;
9027                 TOKEN(0);
9028             }
9029             Aop(OP_CONCAT);
9030         }
9031         /* FALLTHROUGH */
9032     case '0': case '1': case '2': case '3': case '4':
9033     case '5': case '6': case '7': case '8': case '9':
9034         s = scan_num(s, &pl_yylval);
9035         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9036         if (PL_expect == XOPERATOR)
9037             no_op("Number",s);
9038         TERM(THING);
9039
9040     case '\'':
9041         return yyl_sglquote(aTHX_ s);
9042
9043     case '"':
9044         return yyl_dblquote(aTHX_ s);
9045
9046     case '`':
9047         return yyl_backtick(aTHX_ s);
9048
9049     case '\\':
9050         return yyl_backslash(aTHX_ s + 1);
9051
9052     case 'v':
9053         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9054             char *start = s + 2;
9055             while (isDIGIT(*start) || *start == '_')
9056                 start++;
9057             if (*start == '.' && isDIGIT(start[1])) {
9058                 s = scan_num(s, &pl_yylval);
9059                 TERM(THING);
9060             }
9061             else if ((*start == ':' && start[1] == ':')
9062                      || (PL_expect == XSTATE && *start == ':')) {
9063                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9064                     return tok;
9065                 goto retry_bufptr;
9066             }
9067             else if (PL_expect == XSTATE) {
9068                 d = start;
9069                 while (d < PL_bufend && isSPACE(*d)) d++;
9070                 if (*d == ':') {
9071                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9072                         return tok;
9073                     goto retry_bufptr;
9074                 }
9075             }
9076             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9077             if (!isALPHA(*start) && (PL_expect == XTERM
9078                         || PL_expect == XREF || PL_expect == XSTATE
9079                         || PL_expect == XTERMORDORDOR)) {
9080                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9081                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9082                 if (!gv) {
9083                     s = scan_num(s, &pl_yylval);
9084                     TERM(THING);
9085                 }
9086             }
9087         }
9088         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9089             return tok;
9090         goto retry_bufptr;
9091
9092     case 'x':
9093         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9094             s++;
9095             Mop(OP_REPEAT);
9096         }
9097         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9098             return tok;
9099         goto retry_bufptr;
9100
9101     case '_':
9102     case 'a': case 'A':
9103     case 'b': case 'B':
9104     case 'c': case 'C':
9105     case 'd': case 'D':
9106     case 'e': case 'E':
9107     case 'f': case 'F':
9108     case 'g': case 'G':
9109     case 'h': case 'H':
9110     case 'i': case 'I':
9111     case 'j': case 'J':
9112     case 'k': case 'K':
9113     case 'l': case 'L':
9114     case 'm': case 'M':
9115     case 'n': case 'N':
9116     case 'o': case 'O':
9117     case 'p': case 'P':
9118     case 'q': case 'Q':
9119     case 'r': case 'R':
9120     case 's': case 'S':
9121     case 't': case 'T':
9122     case 'u': case 'U':
9123               case 'V':
9124     case 'w': case 'W':
9125               case 'X':
9126     case 'y': case 'Y':
9127     case 'z': case 'Z':
9128         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9129             return tok;
9130         goto retry_bufptr;
9131     }
9132 }
9133
9134
9135 /*
9136   yylex
9137
9138   Works out what to call the token just pulled out of the input
9139   stream.  The yacc parser takes care of taking the ops we return and
9140   stitching them into a tree.
9141
9142   Returns:
9143     The type of the next token
9144
9145   Structure:
9146       Check if we have already built the token; if so, use it.
9147       Switch based on the current state:
9148           - if we have a case modifier in a string, deal with that
9149           - handle other cases of interpolation inside a string
9150           - scan the next line if we are inside a format
9151       In the normal state, switch on the next character:
9152           - default:
9153             if alphabetic, go to key lookup
9154             unrecognized character - croak
9155           - 0/4/26: handle end-of-line or EOF
9156           - cases for whitespace
9157           - \n and #: handle comments and line numbers
9158           - various operators, brackets and sigils
9159           - numbers
9160           - quotes
9161           - 'v': vstrings (or go to key lookup)
9162           - 'x' repetition operator (or go to key lookup)
9163           - other ASCII alphanumerics (key lookup begins here):
9164               word before => ?
9165               keyword plugin
9166               scan built-in keyword (but do nothing with it yet)
9167               check for statement label
9168               check for lexical subs
9169                   return yyl_just_a_word if there is one
9170               see whether built-in keyword is overridden
9171               switch on keyword number:
9172                   - default: return yyl_just_a_word:
9173                       not a built-in keyword; handle bareword lookup
9174                       disambiguate between method and sub call
9175                       fall back to bareword
9176                   - cases for built-in keywords
9177 */
9178
9179 #ifdef NETWARE
9180 #define RSFP_FILENO (PL_rsfp)
9181 #else
9182 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9183 #endif
9184
9185
9186 int
9187 Perl_yylex(pTHX)
9188 {
9189     dVAR;
9190     char *s = PL_bufptr;
9191
9192     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9193         const U8* first_bad_char_loc;
9194         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9195                                                         PL_bufend - PL_bufptr,
9196                                                         &first_bad_char_loc)))
9197         {
9198             _force_out_malformed_utf8_message(first_bad_char_loc,
9199                                               (U8 *) PL_bufend,
9200                                               0,
9201                                               1 /* 1 means die */ );
9202             NOT_REACHED; /* NOTREACHED */
9203         }
9204         PL_parser->recheck_utf8_validity = FALSE;
9205     }
9206     DEBUG_T( {
9207         SV* tmp = newSVpvs("");
9208         PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9209             (IV)CopLINE(PL_curcop),
9210             lex_state_names[PL_lex_state],
9211             exp_name[PL_expect],
9212             pv_display(tmp, s, strlen(s), 0, 60));
9213         SvREFCNT_dec(tmp);
9214     } );
9215
9216     /* when we've already built the next token, just pull it out of the queue */
9217     if (PL_nexttoke) {
9218         PL_nexttoke--;
9219         pl_yylval = PL_nextval[PL_nexttoke];
9220         {
9221             I32 next_type;
9222             next_type = PL_nexttype[PL_nexttoke];
9223             if (next_type & (7<<24)) {
9224                 if (next_type & (1<<24)) {
9225                     if (PL_lex_brackets > 100)
9226                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9227                     PL_lex_brackstack[PL_lex_brackets++] =
9228                         (char) ((next_type >> 16) & 0xff);
9229                 }
9230                 if (next_type & (2<<24))
9231                     PL_lex_allbrackets++;
9232                 if (next_type & (4<<24))
9233                     PL_lex_allbrackets--;
9234                 next_type &= 0xffff;
9235             }
9236             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9237         }
9238     }
9239
9240     switch (PL_lex_state) {
9241     case LEX_NORMAL:
9242     case LEX_INTERPNORMAL:
9243         break;
9244
9245     /* interpolated case modifiers like \L \U, including \Q and \E.
9246        when we get here, PL_bufptr is at the \
9247     */
9248     case LEX_INTERPCASEMOD:
9249         /* handle \E or end of string */
9250         return yyl_interpcasemod(aTHX_ s);
9251
9252     case LEX_INTERPPUSH:
9253         return REPORT(sublex_push());
9254
9255     case LEX_INTERPSTART:
9256         if (PL_bufptr == PL_bufend)
9257             return REPORT(sublex_done());
9258         DEBUG_T({
9259             if(*PL_bufptr != '(')
9260                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9261         });
9262         PL_expect = XTERM;
9263         /* for /@a/, we leave the joining for the regex engine to do
9264          * (unless we're within \Q etc) */
9265         PL_lex_dojoin = (*PL_bufptr == '@'
9266                             && (!PL_lex_inpat || PL_lex_casemods));
9267         PL_lex_state = LEX_INTERPNORMAL;
9268         if (PL_lex_dojoin) {
9269             NEXTVAL_NEXTTOKE.ival = 0;
9270             force_next(',');
9271             force_ident("\"", '$');
9272             NEXTVAL_NEXTTOKE.ival = 0;
9273             force_next('$');
9274             NEXTVAL_NEXTTOKE.ival = 0;
9275             force_next((2<<24)|'(');
9276             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
9277             force_next(FUNC);
9278         }
9279         /* Convert (?{...}) and friends to 'do {...}' */
9280         if (PL_lex_inpat && *PL_bufptr == '(') {
9281             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9282             PL_bufptr += 2;
9283             if (*PL_bufptr != '{')
9284                 PL_bufptr++;
9285             PL_expect = XTERMBLOCK;
9286             force_next(DO);
9287         }
9288
9289         if (PL_lex_starts++) {
9290             s = PL_bufptr;
9291             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9292             if (!PL_lex_casemods && PL_lex_inpat)
9293                 TOKEN(',');
9294             else
9295                 AopNOASSIGN(OP_CONCAT);
9296         }
9297         return yylex();
9298
9299     case LEX_INTERPENDMAYBE:
9300         if (intuit_more(PL_bufptr, PL_bufend)) {
9301             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
9302             break;
9303         }
9304         /* FALLTHROUGH */
9305
9306     case LEX_INTERPEND:
9307         if (PL_lex_dojoin) {
9308             const U8 dojoin_was = PL_lex_dojoin;
9309             PL_lex_dojoin = FALSE;
9310             PL_lex_state = LEX_INTERPCONCAT;
9311             PL_lex_allbrackets--;
9312             return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9313         }
9314         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9315             && SvEVALED(PL_lex_repl))
9316         {
9317             if (PL_bufptr != PL_bufend)
9318                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9319             PL_lex_repl = NULL;
9320         }
9321         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9322            re_eval_str.  If the here-doc body’s length equals the previous
9323            value of re_eval_start, re_eval_start will now be null.  So
9324            check re_eval_str as well. */
9325         if (PL_parser->lex_shared->re_eval_start
9326          || PL_parser->lex_shared->re_eval_str) {
9327             SV *sv;
9328             if (*PL_bufptr != ')')
9329                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9330             PL_bufptr++;
9331             /* having compiled a (?{..}) expression, return the original
9332              * text too, as a const */
9333             if (PL_parser->lex_shared->re_eval_str) {
9334                 sv = PL_parser->lex_shared->re_eval_str;
9335                 PL_parser->lex_shared->re_eval_str = NULL;
9336                 SvCUR_set(sv,
9337                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9338                 SvPV_shrink_to_cur(sv);
9339             }
9340             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9341                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9342             NEXTVAL_NEXTTOKE.opval =
9343                     newSVOP(OP_CONST, 0,
9344                                  sv);
9345             force_next(THING);
9346             PL_parser->lex_shared->re_eval_start = NULL;
9347             PL_expect = XTERM;
9348             return REPORT(',');
9349         }
9350
9351         /* FALLTHROUGH */
9352     case LEX_INTERPCONCAT:
9353 #ifdef DEBUGGING
9354         if (PL_lex_brackets)
9355             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9356                        (long) PL_lex_brackets);
9357 #endif
9358         if (PL_bufptr == PL_bufend)
9359             return REPORT(sublex_done());
9360
9361         /* m'foo' still needs to be parsed for possible (?{...}) */
9362         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9363             SV *sv = newSVsv(PL_linestr);
9364             sv = tokeq(sv);
9365             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9366             s = PL_bufend;
9367         }
9368         else {
9369             int save_error_count = PL_error_count;
9370
9371             s = scan_const(PL_bufptr);
9372
9373             /* Set flag if this was a pattern and there were errors.  op.c will
9374              * refuse to compile a pattern with this flag set.  Otherwise, we
9375              * could get segfaults, etc. */
9376             if (PL_lex_inpat && PL_error_count > save_error_count) {
9377                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9378             }
9379             if (*s == '\\')
9380                 PL_lex_state = LEX_INTERPCASEMOD;
9381             else
9382                 PL_lex_state = LEX_INTERPSTART;
9383         }
9384
9385         if (s != PL_bufptr) {
9386             NEXTVAL_NEXTTOKE = pl_yylval;
9387             PL_expect = XTERM;
9388             force_next(THING);
9389             if (PL_lex_starts++) {
9390                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9391                 if (!PL_lex_casemods && PL_lex_inpat)
9392                     TOKEN(',');
9393                 else
9394                     AopNOASSIGN(OP_CONCAT);
9395             }
9396             else {
9397                 PL_bufptr = s;
9398                 return yylex();
9399             }
9400         }
9401
9402         return yylex();
9403     case LEX_FORMLINE:
9404         if (PL_parser->sub_error_count != PL_error_count) {
9405             /* There was an error parsing a formline, which tends to
9406                mess up the parser.
9407                Unlike interpolated sub-parsing, we can't treat any of
9408                these as recoverable, so no need to check sub_no_recover.
9409             */
9410             yyquit();
9411         }
9412         assert(PL_lex_formbrack);
9413         s = scan_formline(PL_bufptr);
9414         if (!PL_lex_formbrack)
9415             return yyl_rightcurly(aTHX_ s, 1);
9416         PL_bufptr = s;
9417         return yylex();
9418     }
9419
9420     /* We really do *not* want PL_linestr ever becoming a COW. */
9421     assert (!SvIsCOW(PL_linestr));
9422     s = PL_bufptr;
9423     PL_oldoldbufptr = PL_oldbufptr;
9424     PL_oldbufptr = s;
9425
9426     if (PL_in_my == KEY_sigvar) {
9427         PL_parser->saw_infix_sigil = 0;
9428         return yyl_sigvar(aTHX_ s);
9429     }
9430
9431     {
9432         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9433            On its return, we then need to set it to indicate whether the token
9434            we just encountered was an infix operator that (if we hadn't been
9435            expecting an operator) have been a sigil.
9436         */
9437         bool expected_operator = (PL_expect == XOPERATOR);
9438         int ret = yyl_try(aTHX_ s);
9439         switch (pl_yylval.ival) {
9440         case OP_BIT_AND:
9441         case OP_MODULO:
9442         case OP_MULTIPLY:
9443         case OP_NBIT_AND:
9444             if (expected_operator) {
9445                 PL_parser->saw_infix_sigil = 1;
9446                 break;
9447             }
9448             /* FALLTHROUGH */
9449         default:
9450             PL_parser->saw_infix_sigil = 0;
9451         }
9452         return ret;
9453     }
9454 }
9455
9456
9457 /*
9458   S_pending_ident
9459
9460   Looks up an identifier in the pad or in a package
9461
9462   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9463   rather than a plain pad var.
9464
9465   Returns:
9466     PRIVATEREF if this is a lexical name.
9467     BAREWORD   if this belongs to a package.
9468
9469   Structure:
9470       if we're in a my declaration
9471           croak if they tried to say my($foo::bar)
9472           build the ops for a my() declaration
9473       if it's an access to a my() variable
9474           build ops for access to a my() variable
9475       if in a dq string, and they've said @foo and we can't find @foo
9476           warn
9477       build ops for a bareword
9478 */
9479
9480 static int
9481 S_pending_ident(pTHX)
9482 {
9483     PADOFFSET tmp = 0;
9484     const char pit = (char)pl_yylval.ival;
9485     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9486     /* All routes through this function want to know if there is a colon.  */
9487     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9488
9489     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9490           "### Pending identifier '%s'\n", PL_tokenbuf); });
9491     assert(tokenbuf_len >= 2);
9492
9493     /* if we're in a my(), we can't allow dynamics here.
9494        $foo'bar has already been turned into $foo::bar, so
9495        just check for colons.
9496
9497        if it's a legal name, the OP is a PADANY.
9498     */
9499     if (PL_in_my) {
9500         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
9501             if (has_colon)
9502                 /* diag_listed_as: No package name allowed for variable %s
9503                                    in "our" */
9504                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9505                                   "%s %s in \"our\"",
9506                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
9507                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9508             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9509         }
9510         else {
9511             OP *o;
9512             if (has_colon) {
9513                 /* "my" variable %s can't be in a package */
9514                 /* PL_no_myglob is constant */
9515                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9516                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9517                             PL_in_my == KEY_my ? "my" : "state",
9518                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
9519                             PL_tokenbuf),
9520                             UTF ? SVf_UTF8 : 0);
9521                 GCC_DIAG_RESTORE_STMT;
9522             }
9523
9524             if (PL_in_my == KEY_sigvar) {
9525                 /* A signature 'padop' needs in addition, an op_first to
9526                  * point to a child sigdefelem, and an extra field to hold
9527                  * the signature index. We can achieve both by using an
9528                  * UNOP_AUX and (ab)using the op_aux field to hold the
9529                  * index. If we ever need more fields, use a real malloced
9530                  * aux strut instead.
9531                  */
9532                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9533                                     INT2PTR(UNOP_AUX_item *,
9534                                         (PL_parser->sig_elems)));
9535                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9536                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9537                                   :                         OPpARGELEM_HV);
9538             }
9539             else
9540                 o = newOP(OP_PADANY, 0);
9541             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9542                                                         UTF ? SVf_UTF8 : 0);
9543             if (PL_in_my == KEY_sigvar)
9544                 PL_in_my = 0;
9545
9546             pl_yylval.opval = o;
9547             return PRIVATEREF;
9548         }
9549     }
9550
9551     /*
9552        build the ops for accesses to a my() variable.
9553     */
9554
9555     if (!has_colon) {
9556         if (!PL_in_my)
9557             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9558                                  0);
9559         if (tmp != NOT_IN_PAD) {
9560             /* might be an "our" variable" */
9561             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9562                 /* build ops for a bareword */
9563                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
9564                 HEK * const stashname = HvNAME_HEK(stash);
9565                 SV *  const sym = newSVhek(stashname);
9566                 sv_catpvs(sym, "::");
9567                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9568                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9569                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9570                 if (pit != '&')
9571                   gv_fetchsv(sym,
9572                     GV_ADDMULTI,
9573                     ((PL_tokenbuf[0] == '$') ? SVt_PV
9574                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9575                      : SVt_PVHV));
9576                 return BAREWORD;
9577             }
9578
9579             pl_yylval.opval = newOP(OP_PADANY, 0);
9580             pl_yylval.opval->op_targ = tmp;
9581             return PRIVATEREF;
9582         }
9583     }
9584
9585     /*
9586        Whine if they've said @foo or @foo{key} in a doublequoted string,
9587        and @foo (or %foo) isn't a variable we can find in the symbol
9588        table.
9589     */
9590     if (ckWARN(WARN_AMBIGUOUS)
9591         && pit == '@'
9592         && PL_lex_state != LEX_NORMAL
9593         && !PL_lex_brackets)
9594     {
9595         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9596                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9597                                          SVt_PVAV);
9598         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9599            )
9600         {
9601             /* Downgraded from fatal to warning 20000522 mjd */
9602             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9603                         "Possible unintended interpolation of %" UTF8f
9604                         " in string",
9605                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9606         }
9607     }
9608
9609     /* build ops for a bareword */
9610     pl_yylval.opval = newSVOP(OP_CONST, 0,
9611                                    newSVpvn_flags(PL_tokenbuf + 1,
9612                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9613                                                       UTF ? SVf_UTF8 : 0 ));
9614     pl_yylval.opval->op_private = OPpCONST_ENTERED;
9615     if (pit != '&')
9616         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9617                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9618                      | ( UTF ? SVf_UTF8 : 0 ),
9619                      ((PL_tokenbuf[0] == '$') ? SVt_PV
9620                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9621                       : SVt_PVHV));
9622     return BAREWORD;
9623 }
9624
9625 STATIC void
9626 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9627 {
9628     PERL_ARGS_ASSERT_CHECKCOMMA;
9629
9630     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
9631         if (ckWARN(WARN_SYNTAX)) {
9632             int level = 1;
9633             const char *w;
9634             for (w = s+2; *w && level; w++) {
9635                 if (*w == '(')
9636                     ++level;
9637                 else if (*w == ')')
9638                     --level;
9639             }
9640             while (isSPACE(*w))
9641                 ++w;
9642             /* the list of chars below is for end of statements or
9643              * block / parens, boolean operators (&&, ||, //) and branch
9644              * constructs (or, and, if, until, unless, while, err, for).
9645              * Not a very solid hack... */
9646             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9647                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9648                             "%s (...) interpreted as function",name);
9649         }
9650     }
9651     while (s < PL_bufend && isSPACE(*s))
9652         s++;
9653     if (*s == '(')
9654         s++;
9655     while (s < PL_bufend && isSPACE(*s))
9656         s++;
9657     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9658         const char * const w = s;
9659         s += UTF ? UTF8SKIP(s) : 1;
9660         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9661             s += UTF ? UTF8SKIP(s) : 1;
9662         while (s < PL_bufend && isSPACE(*s))
9663             s++;
9664         if (*s == ',') {
9665             GV* gv;
9666             if (keyword(w, s - w, 0))
9667                 return;
9668
9669             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9670             if (gv && GvCVu(gv))
9671                 return;
9672             if (s - w <= 254) {
9673                 PADOFFSET off;
9674                 char tmpbuf[256];
9675                 Copy(w, tmpbuf+1, s - w, char);
9676                 *tmpbuf = '&';
9677                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9678                 if (off != NOT_IN_PAD) return;
9679             }
9680             Perl_croak(aTHX_ "No comma allowed after %s", what);
9681         }
9682     }
9683 }
9684
9685 /* S_new_constant(): do any overload::constant lookup.
9686
9687    Either returns sv, or mortalizes/frees sv and returns a new SV*.
9688    Best used as sv=new_constant(..., sv, ...).
9689    If s, pv are NULL, calls subroutine with one argument,
9690    and <type> is used with error messages only.
9691    <type> is assumed to be well formed UTF-8.
9692
9693    If error_msg is not NULL, *error_msg will be set to any error encountered.
9694    Otherwise yyerror() will be used to output it */
9695
9696 STATIC SV *
9697 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9698                SV *sv, SV *pv, const char *type, STRLEN typelen,
9699                const char ** error_msg)
9700 {
9701     dSP;
9702     HV * table = GvHV(PL_hintgv);                /* ^H */
9703     SV *res;
9704     SV *errsv = NULL;
9705     SV **cvp;
9706     SV *cv, *typesv;
9707     const char *why1 = "", *why2 = "", *why3 = "";
9708     const char * optional_colon = ":";  /* Only some messages have a colon */
9709     char *msg;
9710
9711     PERL_ARGS_ASSERT_NEW_CONSTANT;
9712     /* We assume that this is true: */
9713     assert(type || s);
9714
9715     sv_2mortal(sv);                     /* Parent created it permanently */
9716
9717     if (   ! table
9718         || ! (PL_hints & HINT_LOCALIZE_HH))
9719     {
9720         why1 = "unknown";
9721         optional_colon = "";
9722         goto report;
9723     }
9724
9725     cvp = hv_fetch(table, key, keylen, FALSE);
9726     if (!cvp || !SvOK(*cvp)) {
9727         why1 = "$^H{";
9728         why2 = key;
9729         why3 = "} is not defined";
9730         goto report;
9731     }
9732
9733     cv = *cvp;
9734     if (!pv && s)
9735         pv = newSVpvn_flags(s, len, SVs_TEMP);
9736     if (type && pv)
9737         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9738     else
9739         typesv = &PL_sv_undef;
9740
9741     PUSHSTACKi(PERLSI_OVERLOAD);
9742     ENTER ;
9743     SAVETMPS;
9744
9745     PUSHMARK(SP) ;
9746     EXTEND(sp, 3);
9747     if (pv)
9748         PUSHs(pv);
9749     PUSHs(sv);
9750     if (pv)
9751         PUSHs(typesv);
9752     PUTBACK;
9753     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9754
9755     SPAGAIN ;
9756
9757     /* Check the eval first */
9758     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9759         STRLEN errlen;
9760         const char * errstr;
9761         sv_catpvs(errsv, "Propagated");
9762         errstr = SvPV_const(errsv, errlen);
9763         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9764         (void)POPs;
9765         res = SvREFCNT_inc_simple_NN(sv);
9766     }
9767     else {
9768         res = POPs;
9769         SvREFCNT_inc_simple_void_NN(res);
9770     }
9771
9772     PUTBACK ;
9773     FREETMPS ;
9774     LEAVE ;
9775     POPSTACK;
9776
9777     if (SvOK(res)) {
9778         return res;
9779     }
9780
9781     sv = res;
9782     (void)sv_2mortal(sv);
9783
9784     why1 = "Call to &{$^H{";
9785     why2 = key;
9786     why3 = "}} did not return a defined value";
9787
9788   report:
9789
9790     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9791                         (int)(type ? typelen : len),
9792                         (type ? type: s),
9793                         optional_colon,
9794                         why1, why2, why3);
9795     if (error_msg) {
9796         *error_msg = msg;
9797     }
9798     else {
9799         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9800     }
9801     return SvREFCNT_inc_simple_NN(sv);
9802 }
9803
9804 PERL_STATIC_INLINE void
9805 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9806                     bool is_utf8, bool check_dollar, bool tick_warn)
9807 {
9808     int saw_tick = 0;
9809     const char *olds = *s;
9810     PERL_ARGS_ASSERT_PARSE_IDENT;
9811
9812     while (*s < PL_bufend) {
9813         if (*d >= e)
9814             Perl_croak(aTHX_ "%s", ident_too_long);
9815         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9816              /* The UTF-8 case must come first, otherwise things
9817              * like c\N{COMBINING TILDE} would start failing, as the
9818              * isWORDCHAR_A case below would gobble the 'c' up.
9819              */
9820
9821             char *t = *s + UTF8SKIP(*s);
9822             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9823                 t += UTF8SKIP(t);
9824             }
9825             if (*d + (t - *s) > e)
9826                 Perl_croak(aTHX_ "%s", ident_too_long);
9827             Copy(*s, *d, t - *s, char);
9828             *d += t - *s;
9829             *s = t;
9830         }
9831         else if ( isWORDCHAR_A(**s) ) {
9832             do {
9833                 *(*d)++ = *(*s)++;
9834             } while (isWORDCHAR_A(**s) && *d < e);
9835         }
9836         else if (   allow_package
9837                  && **s == '\''
9838                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9839         {
9840             *(*d)++ = ':';
9841             *(*d)++ = ':';
9842             (*s)++;
9843             saw_tick++;
9844         }
9845         else if (allow_package && **s == ':' && (*s)[1] == ':'
9846            /* Disallow things like Foo::$bar. For the curious, this is
9847             * the code path that triggers the "Bad name after" warning
9848             * when looking for barewords.
9849             */
9850            && !(check_dollar && (*s)[2] == '$')) {
9851             *(*d)++ = *(*s)++;
9852             *(*d)++ = *(*s)++;
9853         }
9854         else
9855             break;
9856     }
9857     if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9858               && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9859         char *this_d;
9860         char *d2;
9861         Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9862         d2 = this_d;
9863         SAVEFREEPV(this_d);
9864         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9865                          "Old package separator used in string");
9866         if (olds[-1] == '#')
9867             *d2++ = olds[-2];
9868         *d2++ = olds[-1];
9869         while (olds < *s) {
9870             if (*olds == '\'') {
9871                 *d2++ = '\\';
9872                 *d2++ = *olds++;
9873             }
9874             else
9875                 *d2++ = *olds++;
9876         }
9877         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9878                          "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9879                           UTF8fARG(is_utf8, d2-this_d, this_d));
9880     }
9881     return;
9882 }
9883
9884 /* Returns a NUL terminated string, with the length of the string written to
9885    *slp
9886    */
9887 char *
9888 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9889 {
9890     char *d = dest;
9891     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
9892     bool is_utf8 = cBOOL(UTF);
9893
9894     PERL_ARGS_ASSERT_SCAN_WORD;
9895
9896     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9897     *d = '\0';
9898     *slp = d - dest;
9899     return s;
9900 }
9901
9902 /* Is the byte 'd' a legal single character identifier name?  'u' is true
9903  * iff Unicode semantics are to be used.  The legal ones are any of:
9904  *  a) all ASCII characters except:
9905  *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9906  *          2) '{'
9907  *     The final case currently doesn't get this far in the program, so we
9908  *     don't test for it.  If that were to change, it would be ok to allow it.
9909  *  b) When not under Unicode rules, any upper Latin1 character
9910  *  c) Otherwise, when unicode rules are used, all XIDS characters.
9911  *
9912  *      Because all ASCII characters have the same representation whether
9913  *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9914  *      '{' without knowing if is UTF-8 or not. */
9915 #define VALID_LEN_ONE_IDENT(s, e, is_utf8)                                  \
9916     (isGRAPH_A(*(s)) || ((is_utf8)                                          \
9917                          ? isIDFIRST_utf8_safe(s, e)                        \
9918                          : (isGRAPH_L1(*s)                                  \
9919                             && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9920
9921 STATIC char *
9922 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9923 {
9924     I32 herelines = PL_parser->herelines;
9925     SSize_t bracket = -1;
9926     char funny = *s++;
9927     char *d = dest;
9928     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
9929     bool is_utf8 = cBOOL(UTF);
9930     I32 orig_copline = 0, tmp_copline = 0;
9931
9932     PERL_ARGS_ASSERT_SCAN_IDENT;
9933
9934     if (isSPACE(*s) || !*s)
9935         s = skipspace(s);
9936     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
9937         bool is_zero= *s == '0' ? TRUE : FALSE;
9938         char *digit_start= d;
9939         *d++ = *s++;
9940         while (s < PL_bufend && isDIGIT(*s)) {
9941             if (d >= e)
9942                 Perl_croak(aTHX_ "%s", ident_too_long);
9943             *d++ = *s++;
9944         }
9945         if (is_zero && d - digit_start > 1)
9946             Perl_croak(aTHX_ ident_var_zero_multi_digit);
9947     }
9948     else {  /* See if it is a "normal" identifier */
9949         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9950     }
9951     *d = '\0';
9952     d = dest;
9953     if (*d) {
9954         /* Either a digit variable, or parse_ident() found an identifier
9955            (anything valid as a bareword), so job done and return.  */
9956         if (PL_lex_state != LEX_NORMAL)
9957             PL_lex_state = LEX_INTERPENDMAYBE;
9958         return s;
9959     }
9960
9961     /* Here, it is not a run-of-the-mill identifier name */
9962
9963     if (*s == '$' && s[1]
9964         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9965             || isDIGIT_A((U8)s[1])
9966             || s[1] == '$'
9967             || s[1] == '{'
9968             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9969     {
9970         /* Dereferencing a value in a scalar variable.
9971            The alternatives are different syntaxes for a scalar variable.
9972            Using ' as a leading package separator isn't allowed. :: is.   */
9973         return s;
9974     }
9975     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
9976     if (*s == '{') {
9977         bracket = s - SvPVX(PL_linestr);
9978         s++;
9979         orig_copline = CopLINE(PL_curcop);
9980         if (s < PL_bufend && isSPACE(*s)) {
9981             s = skipspace(s);
9982         }
9983     }
9984     if ((s <= PL_bufend - ((is_utf8)
9985                           ? UTF8SKIP(s)
9986                           : 1))
9987         && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9988     {
9989         if (is_utf8) {
9990             const STRLEN skip = UTF8SKIP(s);
9991             STRLEN i;
9992             d[skip] = '\0';
9993             for ( i = 0; i < skip; i++ )
9994                 d[i] = *s++;
9995         }
9996         else {
9997             *d = *s++;
9998             /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
9999             if (isDIGIT(*d)) {
10000                 bool is_zero= *d == '0' ? TRUE : FALSE;
10001                 char *digit_start= d;
10002                 while (s < PL_bufend && isDIGIT(*s)) {
10003                     d++;
10004                     if (d >= e)
10005                         Perl_croak(aTHX_ "%s", ident_too_long);
10006                     *d= *s++;
10007                 }
10008                 if (is_zero && d - digit_start > 1)
10009                     Perl_croak(aTHX_ ident_var_zero_multi_digit);
10010             }
10011             d[1] = '\0';
10012         }
10013     }
10014     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10015     if (*d == '^' && *s && isCONTROLVAR(*s)) {
10016         *d = toCTRL(*s);
10017         s++;
10018     }
10019     /* Warn about ambiguous code after unary operators if {...} notation isn't
10020        used.  There's no difference in ambiguity; it's merely a heuristic
10021        about when not to warn.  */
10022     else if (ck_uni && bracket == -1)
10023         check_uni();
10024     if (bracket != -1) {
10025         bool skip;
10026         char *s2;
10027         /* If we were processing {...} notation then...  */
10028         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10029             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10030                  && isWORDCHAR(*s))
10031         ) {
10032             /* note we have to check for a normal identifier first,
10033              * as it handles utf8 symbols, and only after that has
10034              * been ruled out can we look at the caret words */
10035             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10036                 /* if it starts as a valid identifier, assume that it is one.
10037                    (the later check for } being at the expected point will trap
10038                    cases where this doesn't pan out.)  */
10039                 d += is_utf8 ? UTF8SKIP(d) : 1;
10040                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10041                 *d = '\0';
10042             }
10043             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10044                 d++;
10045                 while (isWORDCHAR(*s) && d < e) {
10046                     *d++ = *s++;
10047                 }
10048                 if (d >= e)
10049                     Perl_croak(aTHX_ "%s", ident_too_long);
10050                 *d = '\0';
10051             }
10052             tmp_copline = CopLINE(PL_curcop);
10053             if (s < PL_bufend && isSPACE(*s)) {
10054                 s = skipspace(s);
10055             }
10056             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10057                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10058                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10059                     const char * const brack =
10060                         (const char *)
10061                         ((*s == '[') ? "[...]" : "{...}");
10062                     orig_copline = CopLINE(PL_curcop);
10063                     CopLINE_set(PL_curcop, tmp_copline);
10064    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10065                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10066                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10067                         funny, dest, brack, funny, dest, brack);
10068                     CopLINE_set(PL_curcop, orig_copline);
10069                 }
10070                 bracket++;
10071                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10072                 PL_lex_allbrackets++;
10073                 return s;
10074             }
10075         }
10076
10077         if ( !tmp_copline )
10078             tmp_copline = CopLINE(PL_curcop);
10079         if ((skip = s < PL_bufend && isSPACE(*s))) {
10080             /* Avoid incrementing line numbers or resetting PL_linestart,
10081                in case we have to back up.  */
10082             STRLEN s_off = s - SvPVX(PL_linestr);
10083             s2 = peekspace(s);
10084             s = SvPVX(PL_linestr) + s_off;
10085         }
10086         else
10087             s2 = s;
10088
10089         /* Expect to find a closing } after consuming any trailing whitespace.
10090          */
10091         if (*s2 == '}') {
10092             /* Now increment line numbers if applicable.  */
10093             if (skip)
10094                 s = skipspace(s);
10095             s++;
10096             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10097                 PL_lex_state = LEX_INTERPEND;
10098                 PL_expect = XREF;
10099             }
10100             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10101                 if (ckWARN(WARN_AMBIGUOUS)
10102                     && (keyword(dest, d - dest, 0)
10103                         || get_cvn_flags(dest, d - dest, is_utf8
10104                            ? SVf_UTF8
10105                            : 0)))
10106                 {
10107                     SV *tmp = newSVpvn_flags( dest, d - dest,
10108                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10109                     if (funny == '#')
10110                         funny = '@';
10111                     orig_copline = CopLINE(PL_curcop);
10112                     CopLINE_set(PL_curcop, tmp_copline);
10113                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10114                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10115                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10116                     CopLINE_set(PL_curcop, orig_copline);
10117                 }
10118             }
10119         }
10120         else {
10121             /* Didn't find the closing } at the point we expected, so restore
10122                state such that the next thing to process is the opening { and */
10123             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10124             CopLINE_set(PL_curcop, orig_copline);
10125             PL_parser->herelines = herelines;
10126             *dest = '\0';
10127             PL_parser->sub_no_recover = TRUE;
10128         }
10129     }
10130     else if (   PL_lex_state == LEX_INTERPNORMAL
10131              && !PL_lex_brackets
10132              && !intuit_more(s, PL_bufend))
10133         PL_lex_state = LEX_INTERPEND;
10134     return s;
10135 }
10136
10137 static bool
10138 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10139
10140     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10141      * found in the parse starting at 's', based on the subset that are valid
10142      * in this context input to this routine in 'valid_flags'. Advances s.
10143      * Returns TRUE if the input should be treated as a valid flag, so the next
10144      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10145      * upon first call on the current regex.  This routine will set it to any
10146      * charset modifier found.  The caller shouldn't change it.  This way,
10147      * another charset modifier encountered in the parse can be detected as an
10148      * error, as we have decided to allow only one */
10149
10150     const char c = **s;
10151     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10152
10153     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10154         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10155             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10156                        UTF ? SVf_UTF8 : 0);
10157             (*s) += charlen;
10158             /* Pretend that it worked, so will continue processing before
10159              * dieing */
10160             return TRUE;
10161         }
10162         return FALSE;
10163     }
10164
10165     switch (c) {
10166
10167         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10168         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10169         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10170         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10171         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10172         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10173         case LOCALE_PAT_MOD:
10174             if (*charset) {
10175                 goto multiple_charsets;
10176             }
10177             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10178             *charset = c;
10179             break;
10180         case UNICODE_PAT_MOD:
10181             if (*charset) {
10182                 goto multiple_charsets;
10183             }
10184             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10185             *charset = c;
10186             break;
10187         case ASCII_RESTRICT_PAT_MOD:
10188             if (! *charset) {
10189                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10190             }
10191             else {
10192
10193                 /* Error if previous modifier wasn't an 'a', but if it was, see
10194                  * if, and accept, a second occurrence (only) */
10195                 if (*charset != 'a'
10196                     || get_regex_charset(*pmfl)
10197                         != REGEX_ASCII_RESTRICTED_CHARSET)
10198                 {
10199                         goto multiple_charsets;
10200                 }
10201                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10202             }
10203             *charset = c;
10204             break;
10205         case DEPENDS_PAT_MOD:
10206             if (*charset) {
10207                 goto multiple_charsets;
10208             }
10209             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10210             *charset = c;
10211             break;
10212     }
10213
10214     (*s)++;
10215     return TRUE;
10216
10217     multiple_charsets:
10218         if (*charset != c) {
10219             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10220         }
10221         else if (c == 'a') {
10222   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10223             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10224         }
10225         else {
10226             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10227         }
10228
10229         /* Pretend that it worked, so will continue processing before dieing */
10230         (*s)++;
10231         return TRUE;
10232 }
10233
10234 STATIC char *
10235 S_scan_pat(pTHX_ char *start, I32 type)
10236 {
10237     PMOP *pm;
10238     char *s;
10239     const char * const valid_flags =
10240         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10241     char charset = '\0';    /* character set modifier */
10242     unsigned int x_mod_count = 0;
10243
10244     PERL_ARGS_ASSERT_SCAN_PAT;
10245
10246     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10247     if (!s)
10248         Perl_croak(aTHX_ "Search pattern not terminated");
10249
10250     pm = (PMOP*)newPMOP(type, 0);
10251     if (PL_multi_open == '?') {
10252         /* This is the only point in the code that sets PMf_ONCE:  */
10253         pm->op_pmflags |= PMf_ONCE;
10254
10255         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10256            allows us to restrict the list needed by reset to just the ??
10257            matches.  */
10258         assert(type != OP_TRANS);
10259         if (PL_curstash) {
10260             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10261             U32 elements;
10262             if (!mg) {
10263                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10264                                  0);
10265             }
10266             elements = mg->mg_len / sizeof(PMOP**);
10267             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10268             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10269             mg->mg_len = elements * sizeof(PMOP**);
10270             PmopSTASH_set(pm,PL_curstash);
10271         }
10272     }
10273
10274     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10275      * anon CV. False positives like qr/[(?{]/ are harmless */
10276
10277     if (type == OP_QR) {
10278         STRLEN len;
10279         char *e, *p = SvPV(PL_lex_stuff, len);
10280         e = p + len;
10281         for (; p < e; p++) {
10282             if (p[0] == '(' && p[1] == '?'
10283                 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10284             {
10285                 pm->op_pmflags |= PMf_HAS_CV;
10286                 break;
10287             }
10288         }
10289         pm->op_pmflags |= PMf_IS_QR;
10290     }
10291
10292     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10293                                 &s, &charset, &x_mod_count))
10294     {};
10295     /* issue a warning if /c is specified,but /g is not */
10296     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10297     {
10298         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10299                        "Use of /c modifier is meaningless without /g" );
10300     }
10301
10302     PL_lex_op = (OP*)pm;
10303     pl_yylval.ival = OP_MATCH;
10304     return s;
10305 }
10306
10307 STATIC char *
10308 S_scan_subst(pTHX_ char *start)
10309 {
10310     char *s;
10311     PMOP *pm;
10312     I32 first_start;
10313     line_t first_line;
10314     line_t linediff = 0;
10315     I32 es = 0;
10316     char charset = '\0';    /* character set modifier */
10317     unsigned int x_mod_count = 0;
10318     char *t;
10319
10320     PERL_ARGS_ASSERT_SCAN_SUBST;
10321
10322     pl_yylval.ival = OP_NULL;
10323
10324     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10325
10326     if (!s)
10327         Perl_croak(aTHX_ "Substitution pattern not terminated");
10328
10329     s = t;
10330
10331     first_start = PL_multi_start;
10332     first_line = CopLINE(PL_curcop);
10333     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10334     if (!s) {
10335         SvREFCNT_dec_NN(PL_lex_stuff);
10336         PL_lex_stuff = NULL;
10337         Perl_croak(aTHX_ "Substitution replacement not terminated");
10338     }
10339     PL_multi_start = first_start;       /* so whole substitution is taken together */
10340
10341     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10342
10343
10344     while (*s) {
10345         if (*s == EXEC_PAT_MOD) {
10346             s++;
10347             es++;
10348         }
10349         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10350                                   &s, &charset, &x_mod_count))
10351         {
10352             break;
10353         }
10354     }
10355
10356     if ((pm->op_pmflags & PMf_CONTINUE)) {
10357         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10358     }
10359
10360     if (es) {
10361         SV * const repl = newSVpvs("");
10362
10363         PL_multi_end = 0;
10364         pm->op_pmflags |= PMf_EVAL;
10365         for (; es > 1; es--) {
10366             sv_catpvs(repl, "eval ");
10367         }
10368         sv_catpvs(repl, "do {");
10369         sv_catsv(repl, PL_parser->lex_sub_repl);
10370         sv_catpvs(repl, "}");
10371         SvREFCNT_dec(PL_parser->lex_sub_repl);
10372         PL_parser->lex_sub_repl = repl;
10373     }
10374
10375
10376     linediff = CopLINE(PL_curcop) - first_line;
10377     if (linediff)
10378         CopLINE_set(PL_curcop, first_line);
10379
10380     if (linediff || es) {
10381         /* the IVX field indicates that the replacement string is a s///e;
10382          * the NVX field indicates how many src code lines the replacement
10383          * spreads over */
10384         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10385         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10386         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10387                                                                     cBOOL(es);
10388     }
10389
10390     PL_lex_op = (OP*)pm;
10391     pl_yylval.ival = OP_SUBST;
10392     return s;
10393 }
10394
10395 STATIC char *
10396 S_scan_trans(pTHX_ char *start)
10397 {
10398     char* s;
10399     OP *o;
10400     U8 squash;
10401     U8 del;
10402     U8 complement;
10403     bool nondestruct = 0;
10404     char *t;
10405
10406     PERL_ARGS_ASSERT_SCAN_TRANS;
10407
10408     pl_yylval.ival = OP_NULL;
10409
10410     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10411     if (!s)
10412         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10413
10414     s = t;
10415
10416     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10417     if (!s) {
10418         SvREFCNT_dec_NN(PL_lex_stuff);
10419         PL_lex_stuff = NULL;
10420         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10421     }
10422
10423     complement = del = squash = 0;
10424     while (1) {
10425         switch (*s) {
10426         case 'c':
10427             complement = OPpTRANS_COMPLEMENT;
10428             break;
10429         case 'd':
10430             del = OPpTRANS_DELETE;
10431             break;
10432         case 's':
10433             squash = OPpTRANS_SQUASH;
10434             break;
10435         case 'r':
10436             nondestruct = 1;
10437             break;
10438         default:
10439             goto no_more;
10440         }
10441         s++;
10442     }
10443   no_more:
10444
10445     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10446     o->op_private &= ~OPpTRANS_ALL;
10447     o->op_private |= del|squash|complement;
10448
10449     PL_lex_op = o;
10450     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10451
10452
10453     return s;
10454 }
10455
10456 /* scan_heredoc
10457    Takes a pointer to the first < in <<FOO.
10458    Returns a pointer to the byte following <<FOO.
10459
10460    This function scans a heredoc, which involves different methods
10461    depending on whether we are in a string eval, quoted construct, etc.
10462    This is because PL_linestr could containing a single line of input, or
10463    a whole string being evalled, or the contents of the current quote-
10464    like operator.
10465
10466    The two basic methods are:
10467     - Steal lines from the input stream
10468     - Scan the heredoc in PL_linestr and remove it therefrom
10469
10470    In a file scope or filtered eval, the first method is used; in a
10471    string eval, the second.
10472
10473    In a quote-like operator, we have to choose between the two,
10474    depending on where we can find a newline.  We peek into outer lex-
10475    ing scopes until we find one with a newline in it.  If we reach the
10476    outermost lexing scope and it is a file, we use the stream method.
10477    Otherwise it is treated as an eval.
10478 */
10479
10480 STATIC char *
10481 S_scan_heredoc(pTHX_ char *s)
10482 {
10483     I32 op_type = OP_SCALAR;
10484     I32 len;
10485     SV *tmpstr;
10486     char term;
10487     char *d;
10488     char *e;
10489     char *peek;
10490     char *indent = 0;
10491     I32 indent_len = 0;
10492     bool indented = FALSE;
10493     const bool infile = PL_rsfp || PL_parser->filtered;
10494     const line_t origline = CopLINE(PL_curcop);
10495     LEXSHARED *shared = PL_parser->lex_shared;
10496
10497     PERL_ARGS_ASSERT_SCAN_HEREDOC;
10498
10499     s += 2;
10500     d = PL_tokenbuf + 1;
10501     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10502     *PL_tokenbuf = '\n';
10503     peek = s;
10504
10505     if (*peek == '~') {
10506         indented = TRUE;
10507         peek++; s++;
10508     }
10509
10510     while (SPACE_OR_TAB(*peek))
10511         peek++;
10512
10513     if (*peek == '`' || *peek == '\'' || *peek =='"') {
10514         s = peek;
10515         term = *s++;
10516         s = delimcpy(d, e, s, PL_bufend, term, &len);
10517         if (s == PL_bufend)
10518             Perl_croak(aTHX_ "Unterminated delimiter for here document");
10519         d += len;
10520         s++;
10521     }
10522     else {
10523         if (*s == '\\')
10524             /* <<\FOO is equivalent to <<'FOO' */
10525             s++, term = '\'';
10526         else
10527             term = '"';
10528
10529         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10530             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10531
10532         peek = s;
10533
10534         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10535             peek += UTF ? UTF8SKIP(peek) : 1;
10536         }
10537
10538         len = (peek - s >= e - d) ? (e - d) : (peek - s);
10539         Copy(s, d, len, char);
10540         s += len;
10541         d += len;
10542     }
10543
10544     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10545         Perl_croak(aTHX_ "Delimiter for here document is too long");
10546
10547     *d++ = '\n';
10548     *d = '\0';
10549     len = d - PL_tokenbuf;
10550
10551 #ifndef PERL_STRICT_CR
10552     d = (char *) memchr(s, '\r', PL_bufend - s);
10553     if (d) {
10554         char * const olds = s;
10555         s = d;
10556         while (s < PL_bufend) {
10557             if (*s == '\r') {
10558                 *d++ = '\n';
10559                 if (*++s == '\n')
10560                     s++;
10561             }
10562             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
10563                 *d++ = *s++;
10564                 s++;
10565             }
10566             else
10567                 *d++ = *s++;
10568         }
10569         *d = '\0';
10570         PL_bufend = d;
10571         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10572         s = olds;
10573     }
10574 #endif
10575
10576     tmpstr = newSV_type(SVt_PVIV);
10577     SvGROW(tmpstr, 80);
10578     if (term == '\'') {
10579         op_type = OP_CONST;
10580         SvIV_set(tmpstr, -1);
10581     }
10582     else if (term == '`') {
10583         op_type = OP_BACKTICK;
10584         SvIV_set(tmpstr, '\\');
10585     }
10586
10587     PL_multi_start = origline + 1 + PL_parser->herelines;
10588     PL_multi_open = PL_multi_close = '<';
10589
10590     /* inside a string eval or quote-like operator */
10591     if (!infile || PL_lex_inwhat) {
10592         SV *linestr;
10593         char *bufend;
10594         char * const olds = s;
10595         PERL_CONTEXT * const cx = CX_CUR();
10596         /* These two fields are not set until an inner lexing scope is
10597            entered.  But we need them set here. */
10598         shared->ls_bufptr  = s;
10599         shared->ls_linestr = PL_linestr;
10600
10601         if (PL_lex_inwhat) {
10602             /* Look for a newline.  If the current buffer does not have one,
10603              peek into the line buffer of the parent lexing scope, going
10604              up as many levels as necessary to find one with a newline
10605              after bufptr.
10606             */
10607             while (!(s = (char *)memchr(
10608                                 (void *)shared->ls_bufptr, '\n',
10609                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
10610                 )))
10611             {
10612                 shared = shared->ls_prev;
10613                 /* shared is only null if we have gone beyond the outermost
10614                    lexing scope.  In a file, we will have broken out of the
10615                    loop in the previous iteration.  In an eval, the string buf-
10616                    fer ends with "\n;", so the while condition above will have
10617                    evaluated to false.  So shared can never be null.  Or so you
10618                    might think.  Odd syntax errors like s;@{<<; can gobble up
10619                    the implicit semicolon at the end of a flie, causing the
10620                    file handle to be closed even when we are not in a string
10621                    eval.  So shared may be null in that case.
10622                    (Closing '>>}' here to balance the earlier open brace for
10623                    editors that look for matched pairs.) */
10624                 if (UNLIKELY(!shared))
10625                     goto interminable;
10626                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10627                    most lexing scope.  In a file, shared->ls_linestr at that
10628                    level is just one line, so there is no body to steal. */
10629                 if (infile && !shared->ls_prev) {
10630                     s = olds;
10631                     goto streaming;
10632                 }
10633             }
10634         }
10635         else {  /* eval or we've already hit EOF */
10636             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10637             if (!s)
10638                 goto interminable;
10639         }
10640
10641         linestr = shared->ls_linestr;
10642         bufend = SvEND(linestr);
10643         d = s;
10644         if (indented) {
10645             char *myolds = s;
10646
10647             while (s < bufend - len + 1) {
10648                 if (*s++ == '\n')
10649                     ++PL_parser->herelines;
10650
10651                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10652                     char *backup = s;
10653                     indent_len = 0;
10654
10655                     /* Only valid if it's preceded by whitespace only */
10656                     while (backup != myolds && --backup >= myolds) {
10657                         if (! SPACE_OR_TAB(*backup)) {
10658                             break;
10659                         }
10660                         indent_len++;
10661                     }
10662
10663                     /* No whitespace or all! */
10664                     if (backup == s || *backup == '\n') {
10665                         Newx(indent, indent_len + 1, char);
10666                         memcpy(indent, backup + 1, indent_len);
10667                         indent[indent_len] = 0;
10668                         s--; /* before our delimiter */
10669                         PL_parser->herelines--; /* this line doesn't count */
10670                         break;
10671                     }
10672                 }
10673             }
10674         }
10675         else {
10676             while (s < bufend - len + 1
10677                    && memNE(s,PL_tokenbuf,len) )
10678             {
10679                 if (*s++ == '\n')
10680                     ++PL_parser->herelines;
10681             }
10682         }
10683
10684         if (s >= bufend - len + 1) {
10685             goto interminable;
10686         }
10687
10688         sv_setpvn(tmpstr,d+1,s-d);
10689         s += len - 1;
10690         /* the preceding stmt passes a newline */
10691         PL_parser->herelines++;
10692
10693         /* s now points to the newline after the heredoc terminator.
10694            d points to the newline before the body of the heredoc.
10695          */
10696
10697         /* We are going to modify linestr in place here, so set
10698            aside copies of the string if necessary for re-evals or
10699            (caller $n)[6]. */
10700         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10701            check shared->re_eval_str. */
10702         if (shared->re_eval_start || shared->re_eval_str) {
10703             /* Set aside the rest of the regexp */
10704             if (!shared->re_eval_str)
10705                 shared->re_eval_str =
10706                        newSVpvn(shared->re_eval_start,
10707                                 bufend - shared->re_eval_start);
10708             shared->re_eval_start -= s-d;
10709         }
10710
10711         if (cxstack_ix >= 0
10712             && CxTYPE(cx) == CXt_EVAL
10713             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10714             && cx->blk_eval.cur_text == linestr)
10715         {
10716             cx->blk_eval.cur_text = newSVsv(linestr);
10717             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10718         }
10719
10720         /* Copy everything from s onwards back to d. */
10721         Move(s,d,bufend-s + 1,char);
10722         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10723         /* Setting PL_bufend only applies when we have not dug deeper
10724            into other scopes, because sublex_done sets PL_bufend to
10725            SvEND(PL_linestr). */
10726         if (shared == PL_parser->lex_shared)
10727             PL_bufend = SvEND(linestr);
10728         s = olds;
10729     }
10730     else {
10731         SV *linestr_save;
10732         char *oldbufptr_save;
10733         char *oldoldbufptr_save;
10734       streaming:
10735         SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
10736         term = PL_tokenbuf[1];
10737         len--;
10738         linestr_save = PL_linestr; /* must restore this afterwards */
10739         d = s;                   /* and this */
10740         oldbufptr_save = PL_oldbufptr;
10741         oldoldbufptr_save = PL_oldoldbufptr;
10742         PL_linestr = newSVpvs("");
10743         PL_bufend = SvPVX(PL_linestr);
10744
10745         while (1) {
10746             PL_bufptr = PL_bufend;
10747             CopLINE_set(PL_curcop,
10748                         origline + 1 + PL_parser->herelines);
10749
10750             if (   !lex_next_chunk(LEX_NO_TERM)
10751                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10752             {
10753                 /* Simply freeing linestr_save might seem simpler here, as it
10754                    does not matter what PL_linestr points to, since we are
10755                    about to croak; but in a quote-like op, linestr_save
10756                    will have been prospectively freed already, via
10757                    SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10758                    restore PL_linestr. */
10759                 SvREFCNT_dec_NN(PL_linestr);
10760                 PL_linestr = linestr_save;
10761                 PL_oldbufptr = oldbufptr_save;
10762                 PL_oldoldbufptr = oldoldbufptr_save;
10763                 goto interminable;
10764             }
10765
10766             CopLINE_set(PL_curcop, origline);
10767
10768             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10769                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10770                 /* ^That should be enough to avoid this needing to grow:  */
10771                 sv_catpvs(PL_linestr, "\n\0");
10772                 assert(s == SvPVX(PL_linestr));
10773                 PL_bufend = SvEND(PL_linestr);
10774             }
10775
10776             s = PL_bufptr;
10777             PL_parser->herelines++;
10778             PL_last_lop = PL_last_uni = NULL;
10779
10780 #ifndef PERL_STRICT_CR
10781             if (PL_bufend - PL_linestart >= 2) {
10782                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10783                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10784                 {
10785                     PL_bufend[-2] = '\n';
10786                     PL_bufend--;
10787                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10788                 }
10789                 else if (PL_bufend[-1] == '\r')
10790                     PL_bufend[-1] = '\n';
10791             }
10792             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10793                 PL_bufend[-1] = '\n';
10794 #endif
10795
10796             if (indented && (PL_bufend-s) >= len) {
10797                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10798
10799                 if (found) {
10800                     char *backup = found;
10801                     indent_len = 0;
10802
10803                     /* Only valid if it's preceded by whitespace only */
10804                     while (backup != s && --backup >= s) {
10805                         if (! SPACE_OR_TAB(*backup)) {
10806                             break;
10807                         }
10808                         indent_len++;
10809                     }
10810
10811                     /* All whitespace or none! */
10812                     if (backup == found || SPACE_OR_TAB(*backup)) {
10813                         Newx(indent, indent_len + 1, char);
10814                         memcpy(indent, backup, indent_len);
10815                         indent[indent_len] = 0;
10816                         SvREFCNT_dec(PL_linestr);
10817                         PL_linestr = linestr_save;
10818                         PL_linestart = SvPVX(linestr_save);
10819                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10820                         PL_oldbufptr = oldbufptr_save;
10821                         PL_oldoldbufptr = oldoldbufptr_save;
10822                         s = d;
10823                         break;
10824                     }
10825                 }
10826
10827                 /* Didn't find it */
10828                 sv_catsv(tmpstr,PL_linestr);
10829             }
10830             else {
10831                 if (*s == term && PL_bufend-s >= len
10832                     && memEQ(s,PL_tokenbuf + 1,len))
10833                 {
10834                     SvREFCNT_dec(PL_linestr);
10835                     PL_linestr = linestr_save;
10836                     PL_linestart = SvPVX(linestr_save);
10837                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10838                     PL_oldbufptr = oldbufptr_save;
10839                     PL_oldoldbufptr = oldoldbufptr_save;
10840                     s = d;
10841                     break;
10842                 }
10843                 else {
10844                     sv_catsv(tmpstr,PL_linestr);
10845                 }
10846             }
10847         } /* while (1) */
10848     }
10849
10850     PL_multi_end = origline + PL_parser->herelines;
10851
10852     if (indented && indent) {
10853         STRLEN linecount = 1;
10854         STRLEN herelen = SvCUR(tmpstr);
10855         char *ss = SvPVX(tmpstr);
10856         char *se = ss + herelen;
10857         SV *newstr = newSV(herelen+1);
10858         SvPOK_on(newstr);
10859
10860         /* Trim leading whitespace */
10861         while (ss < se) {
10862             /* newline only? Copy and move on */
10863             if (*ss == '\n') {
10864                 sv_catpvs(newstr,"\n");
10865                 ss++;
10866                 linecount++;
10867
10868             /* Found our indentation? Strip it */
10869             }
10870             else if (se - ss >= indent_len
10871                        && memEQ(ss, indent, indent_len))
10872             {
10873                 STRLEN le = 0;
10874                 ss += indent_len;
10875
10876                 while ((ss + le) < se && *(ss + le) != '\n')
10877                     le++;
10878
10879                 sv_catpvn(newstr, ss, le);
10880                 ss += le;
10881
10882             /* Line doesn't begin with our indentation? Croak */
10883             }
10884             else {
10885                 Safefree(indent);
10886                 Perl_croak(aTHX_
10887                     "Indentation on line %d of here-doc doesn't match delimiter",
10888                     (int)linecount
10889                 );
10890             }
10891         } /* while */
10892
10893         /* avoid sv_setsv() as we dont wan't to COW here */
10894         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10895         Safefree(indent);
10896         SvREFCNT_dec_NN(newstr);
10897     }
10898
10899     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10900         SvPV_shrink_to_cur(tmpstr);
10901     }
10902
10903     if (!IN_BYTES) {
10904         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10905             SvUTF8_on(tmpstr);
10906     }
10907
10908     PL_lex_stuff = tmpstr;
10909     pl_yylval.ival = op_type;
10910     return s;
10911
10912   interminable:
10913     if (indent)
10914         Safefree(indent);
10915     SvREFCNT_dec(tmpstr);
10916     CopLINE_set(PL_curcop, origline);
10917     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10918 }
10919
10920
10921 /* scan_inputsymbol
10922    takes: position of first '<' in input buffer
10923    returns: position of first char following the matching '>' in
10924             input buffer
10925    side-effects: pl_yylval and lex_op are set.
10926
10927    This code handles:
10928
10929    <>           read from ARGV
10930    <<>>         read from ARGV without magic open
10931    <FH>         read from filehandle
10932    <pkg::FH>    read from package qualified filehandle
10933    <pkg'FH>     read from package qualified filehandle
10934    <$fh>        read from filehandle in $fh
10935    <*.h>        filename glob
10936
10937 */
10938
10939 STATIC char *
10940 S_scan_inputsymbol(pTHX_ char *start)
10941 {
10942     char *s = start;            /* current position in buffer */
10943     char *end;
10944     I32 len;
10945     bool nomagicopen = FALSE;
10946     char *d = PL_tokenbuf;                                      /* start of temp holding space */
10947     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
10948
10949     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10950
10951     end = (char *) memchr(s, '\n', PL_bufend - s);
10952     if (!end)
10953         end = PL_bufend;
10954     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10955         nomagicopen = TRUE;
10956         *d = '\0';
10957         len = 0;
10958         s += 3;
10959     }
10960     else
10961         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
10962
10963     /* die if we didn't have space for the contents of the <>,
10964        or if it didn't end, or if we see a newline
10965     */
10966
10967     if (len >= (I32)sizeof PL_tokenbuf)
10968         Perl_croak(aTHX_ "Excessively long <> operator");
10969     if (s >= end)
10970         Perl_croak(aTHX_ "Unterminated <> operator");
10971
10972     s++;
10973
10974     /* check for <$fh>
10975        Remember, only scalar variables are interpreted as filehandles by
10976        this code.  Anything more complex (e.g., <$fh{$num}>) will be
10977        treated as a glob() call.
10978        This code makes use of the fact that except for the $ at the front,
10979        a scalar variable and a filehandle look the same.
10980     */
10981     if (*d == '$' && d[1]) d++;
10982
10983     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10984     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10985         d += UTF ? UTF8SKIP(d) : 1;
10986     }
10987
10988     /* If we've tried to read what we allow filehandles to look like, and
10989        there's still text left, then it must be a glob() and not a getline.
10990        Use scan_str to pull out the stuff between the <> and treat it
10991        as nothing more than a string.
10992     */
10993
10994     if (d - PL_tokenbuf != len) {
10995         pl_yylval.ival = OP_GLOB;
10996         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10997         if (!s)
10998            Perl_croak(aTHX_ "Glob not terminated");
10999         return s;
11000     }
11001     else {
11002         bool readline_overriden = FALSE;
11003         GV *gv_readline;
11004         /* we're in a filehandle read situation */
11005         d = PL_tokenbuf;
11006
11007         /* turn <> into <ARGV> */
11008         if (!len)
11009             Copy("ARGV",d,5,char);
11010
11011         /* Check whether readline() is overriden */
11012         if ((gv_readline = gv_override("readline",8)))
11013             readline_overriden = TRUE;
11014
11015         /* if <$fh>, create the ops to turn the variable into a
11016            filehandle
11017         */
11018         if (*d == '$') {
11019             /* try to find it in the pad for this block, otherwise find
11020                add symbol table ops
11021             */
11022             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11023             if (tmp != NOT_IN_PAD) {
11024                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11025                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11026                     HEK * const stashname = HvNAME_HEK(stash);
11027                     SV * const sym = sv_2mortal(newSVhek(stashname));
11028                     sv_catpvs(sym, "::");
11029                     sv_catpv(sym, d+1);
11030                     d = SvPVX(sym);
11031                     goto intro_sym;
11032                 }
11033                 else {
11034                     OP * const o = newOP(OP_PADSV, 0);
11035                     o->op_targ = tmp;
11036                     PL_lex_op = readline_overriden
11037                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11038                                 op_append_elem(OP_LIST, o,
11039                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11040                         : newUNOP(OP_READLINE, 0, o);
11041                 }
11042             }
11043             else {
11044                 GV *gv;
11045                 ++d;
11046               intro_sym:
11047                 gv = gv_fetchpv(d,
11048                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11049                                 SVt_PV);
11050                 PL_lex_op = readline_overriden
11051                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11052                             op_append_elem(OP_LIST,
11053                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11054                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11055                     : newUNOP(OP_READLINE, 0,
11056                             newUNOP(OP_RV2SV, 0,
11057                                 newGVOP(OP_GV, 0, gv)));
11058             }
11059             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11060             pl_yylval.ival = OP_NULL;
11061         }
11062
11063         /* If it's none of the above, it must be a literal filehandle
11064            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11065         else {
11066             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11067             PL_lex_op = readline_overriden
11068                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11069                         op_append_elem(OP_LIST,
11070                             newGVOP(OP_GV, 0, gv),
11071                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11072                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11073             pl_yylval.ival = OP_NULL;
11074         }
11075     }
11076
11077     return s;
11078 }
11079
11080
11081 /* scan_str
11082    takes:
11083         start                   position in buffer
11084         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11085                                 only if they are of the open/close form
11086         keep_delims             preserve the delimiters around the string
11087         re_reparse              compiling a run-time /(?{})/:
11088                                    collapse // to /,  and skip encoding src
11089         delimp                  if non-null, this is set to the position of
11090                                 the closing delimiter, or just after it if
11091                                 the closing and opening delimiters differ
11092                                 (i.e., the opening delimiter of a substitu-
11093                                 tion replacement)
11094    returns: position to continue reading from buffer
11095    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11096         updates the read buffer.
11097
11098    This subroutine pulls a string out of the input.  It is called for:
11099         q               single quotes           q(literal text)
11100         '               single quotes           'literal text'
11101         qq              double quotes           qq(interpolate $here please)
11102         "               double quotes           "interpolate $here please"
11103         qx              backticks               qx(/bin/ls -l)
11104         `               backticks               `/bin/ls -l`
11105         qw              quote words             @EXPORT_OK = qw( func() $spam )
11106         m//             regexp match            m/this/
11107         s///            regexp substitute       s/this/that/
11108         tr///           string transliterate    tr/this/that/
11109         y///            string transliterate    y/this/that/
11110         ($*@)           sub prototypes          sub foo ($)
11111         (stuff)         sub attr parameters     sub foo : attr(stuff)
11112         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11113
11114    In most of these cases (all but <>, patterns and transliterate)
11115    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11116    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11117    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11118    calls scan_str().
11119
11120    It skips whitespace before the string starts, and treats the first
11121    character as the delimiter.  If the delimiter is one of ([{< then
11122    the corresponding "close" character )]}> is used as the closing
11123    delimiter.  It allows quoting of delimiters, and if the string has
11124    balanced delimiters ([{<>}]) it allows nesting.
11125
11126    On success, the SV with the resulting string is put into lex_stuff or,
11127    if that is already non-NULL, into lex_repl. The second case occurs only
11128    when parsing the RHS of the special constructs s/// and tr/// (y///).
11129    For convenience, the terminating delimiter character is stuffed into
11130    SvIVX of the SV.
11131 */
11132
11133 char *
11134 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11135                  char **delimp
11136     )
11137 {
11138     SV *sv;                     /* scalar value: string */
11139     const char *tmps;           /* temp string, used for delimiter matching */
11140     char *s = start;            /* current position in the buffer */
11141     char term;                  /* terminating character */
11142     char *to;                   /* current position in the sv's data */
11143     I32 brackets = 1;           /* bracket nesting level */
11144     bool d_is_utf8 = FALSE;     /* is there any utf8 content? */
11145     IV termcode;                /* terminating char. code */
11146     U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11147     STRLEN termlen;             /* length of terminating string */
11148     line_t herelines;
11149
11150     /* The delimiters that have a mirror-image closing one */
11151     const char * opening_delims = "([{<";
11152     const char * closing_delims = ")]}>";
11153
11154     /* The only non-UTF character that isn't a stand alone grapheme is
11155      * white-space, hence can't be a delimiter. */
11156     const char * non_grapheme_msg = "Use of unassigned code point or"
11157                                     " non-standalone grapheme for a delimiter"
11158                                     " is not allowed";
11159     PERL_ARGS_ASSERT_SCAN_STR;
11160
11161     /* skip space before the delimiter */
11162     if (isSPACE(*s)) {
11163         s = skipspace(s);
11164     }
11165
11166     /* mark where we are, in case we need to report errors */
11167     CLINE;
11168
11169     /* after skipping whitespace, the next character is the terminator */
11170     term = *s;
11171     if (!UTF || UTF8_IS_INVARIANT(term)) {
11172         termcode = termstr[0] = term;
11173         termlen = 1;
11174     }
11175     else {
11176         termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11177         if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11178                                            (U8 *) s,
11179                                            (U8 *) PL_bufend,
11180                                                   termcode)))
11181         {
11182             yyerror(non_grapheme_msg);
11183         }
11184
11185         Copy(s, termstr, termlen, U8);
11186     }
11187
11188     /* mark where we are */
11189     PL_multi_start = CopLINE(PL_curcop);
11190     PL_multi_open = termcode;
11191     herelines = PL_parser->herelines;
11192
11193     /* If the delimiter has a mirror-image closing one, get it */
11194     if (term && (tmps = strchr(opening_delims, term))) {
11195         termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11196     }
11197
11198     PL_multi_close = termcode;
11199
11200     if (PL_multi_open == PL_multi_close) {
11201         keep_bracketed_quoted = FALSE;
11202     }
11203
11204     /* create a new SV to hold the contents.  79 is the SV's initial length.
11205        What a random number. */
11206     sv = newSV_type(SVt_PVIV);
11207     SvGROW(sv, 80);
11208     SvIV_set(sv, termcode);
11209     (void)SvPOK_only(sv);               /* validate pointer */
11210
11211     /* move past delimiter and try to read a complete string */
11212     if (keep_delims)
11213         sv_catpvn(sv, s, termlen);
11214     s += termlen;
11215     for (;;) {
11216         /* extend sv if need be */
11217         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11218         /* set 'to' to the next character in the sv's string */
11219         to = SvPVX(sv)+SvCUR(sv);
11220
11221         /* if open delimiter is the close delimiter read unbridle */
11222         if (PL_multi_open == PL_multi_close) {
11223             for (; s < PL_bufend; s++,to++) {
11224                 /* embedded newlines increment the current line number */
11225                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11226                     COPLINE_INC_WITH_HERELINES;
11227                 /* handle quoted delimiters */
11228                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11229                     if (!keep_bracketed_quoted
11230                         && (s[1] == term
11231                             || (re_reparse && s[1] == '\\'))
11232                     )
11233                         s++;
11234                     else /* any other quotes are simply copied straight through */
11235                         *to++ = *s++;
11236                 }
11237                 /* terminate when run out of buffer (the for() condition), or
11238                    have found the terminator */
11239                 else if (*s == term) {  /* First byte of terminator matches */
11240                     if (termlen == 1)   /* If is the only byte, are done */
11241                         break;
11242
11243                     /* If the remainder of the terminator matches, also are
11244                      * done, after checking that is a separate grapheme */
11245                     if (   s + termlen <= PL_bufend
11246                         && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11247                     {
11248                         if (   UTF
11249                             && UNLIKELY(! is_grapheme((U8 *) start,
11250                                                        (U8 *) s,
11251                                                        (U8 *) PL_bufend,
11252                                                               termcode)))
11253                         {
11254                             yyerror(non_grapheme_msg);
11255                         }
11256                         break;
11257                     }
11258                 }
11259                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11260                     d_is_utf8 = TRUE;
11261                 }
11262
11263                 *to = *s;
11264             }
11265         }
11266
11267         /* if the terminator isn't the same as the start character (e.g.,
11268            matched brackets), we have to allow more in the quoting, and
11269            be prepared for nested brackets.
11270         */
11271         else {
11272             /* read until we run out of string, or we find the terminator */
11273             for (; s < PL_bufend; s++,to++) {
11274                 /* embedded newlines increment the line count */
11275                 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11276                     COPLINE_INC_WITH_HERELINES;
11277                 /* backslashes can escape the open or closing characters */
11278                 if (*s == '\\' && s+1 < PL_bufend) {
11279                     if (!keep_bracketed_quoted
11280                        && ( ((UV)s[1] == PL_multi_open)
11281                          || ((UV)s[1] == PL_multi_close) ))
11282                     {
11283                         s++;
11284                     }
11285                     else
11286                         *to++ = *s++;
11287                 }
11288                 /* allow nested opens and closes */
11289                 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11290                     break;
11291                 else if ((UV)*s == PL_multi_open)
11292                     brackets++;
11293                 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11294                     d_is_utf8 = TRUE;
11295                 *to = *s;
11296             }
11297         }
11298         /* terminate the copied string and update the sv's end-of-string */
11299         *to = '\0';
11300         SvCUR_set(sv, to - SvPVX_const(sv));
11301
11302         /*
11303          * this next chunk reads more into the buffer if we're not done yet
11304          */
11305
11306         if (s < PL_bufend)
11307             break;              /* handle case where we are done yet :-) */
11308
11309 #ifndef PERL_STRICT_CR
11310         if (to - SvPVX_const(sv) >= 2) {
11311             if (   (to[-2] == '\r' && to[-1] == '\n')
11312                 || (to[-2] == '\n' && to[-1] == '\r'))
11313             {
11314                 to[-2] = '\n';
11315                 to--;
11316                 SvCUR_set(sv, to - SvPVX_const(sv));
11317             }
11318             else if (to[-1] == '\r')
11319                 to[-1] = '\n';
11320         }
11321         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11322             to[-1] = '\n';
11323 #endif
11324
11325         /* if we're out of file, or a read fails, bail and reset the current
11326            line marker so we can report where the unterminated string began
11327         */
11328         COPLINE_INC_WITH_HERELINES;
11329         PL_bufptr = PL_bufend;
11330         if (!lex_next_chunk(0)) {
11331             sv_free(sv);
11332             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11333             return NULL;
11334         }
11335         s = start = PL_bufptr;
11336     }
11337
11338     /* at this point, we have successfully read the delimited string */
11339
11340     if (keep_delims)
11341             sv_catpvn(sv, s, termlen);
11342     s += termlen;
11343
11344     if (d_is_utf8)
11345         SvUTF8_on(sv);
11346
11347     PL_multi_end = CopLINE(PL_curcop);
11348     CopLINE_set(PL_curcop, PL_multi_start);
11349     PL_parser->herelines = herelines;
11350
11351     /* if we allocated too much space, give some back */
11352     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11353         SvLEN_set(sv, SvCUR(sv) + 1);
11354         SvPV_renew(sv, SvLEN(sv));
11355     }
11356
11357     /* decide whether this is the first or second quoted string we've read
11358        for this op
11359     */
11360
11361     if (PL_lex_stuff)
11362         PL_parser->lex_sub_repl = sv;
11363     else
11364         PL_lex_stuff = sv;
11365     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11366     return s;
11367 }
11368
11369 /*
11370   scan_num
11371   takes: pointer to position in buffer
11372   returns: pointer to new position in buffer
11373   side-effects: builds ops for the constant in pl_yylval.op
11374
11375   Read a number in any of the formats that Perl accepts:
11376
11377   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
11378   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
11379   0b[01](_?[01])*                                       binary integers
11380   0[0-7](_?[0-7])*                                      octal integers
11381   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
11382   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
11383
11384   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11385   thing it reads.
11386
11387   If it reads a number without a decimal point or an exponent, it will
11388   try converting the number to an integer and see if it can do so
11389   without loss of precision.
11390 */
11391
11392 char *
11393 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11394 {
11395     const char *s = start;      /* current position in buffer */
11396     char *d;                    /* destination in temp buffer */
11397     char *e;                    /* end of temp buffer */
11398     NV nv;                              /* number read, as a double */
11399     SV *sv = NULL;                      /* place to put the converted number */
11400     bool floatit;                       /* boolean: int or float? */
11401     const char *lastub = NULL;          /* position of last underbar */
11402     static const char* const number_too_long = "Number too long";
11403     bool warned_about_underscore = 0;
11404     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11405 #define WARN_ABOUT_UNDERSCORE() \
11406         do { \
11407             if (!warned_about_underscore) { \
11408                 warned_about_underscore = 1; \
11409                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11410                                "Misplaced _ in number"); \
11411             } \
11412         } while(0)
11413     /* Hexadecimal floating point.
11414      *
11415      * In many places (where we have quads and NV is IEEE 754 double)
11416      * we can fit the mantissa bits of a NV into an unsigned quad.
11417      * (Note that UVs might not be quads even when we have quads.)
11418      * This will not work everywhere, though (either no quads, or
11419      * using long doubles), in which case we have to resort to NV,
11420      * which will probably mean horrible loss of precision due to
11421      * multiple fp operations. */
11422     bool hexfp = FALSE;
11423     int total_bits = 0;
11424     int significant_bits = 0;
11425 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11426 #  define HEXFP_UQUAD
11427     Uquad_t hexfp_uquad = 0;
11428     int hexfp_frac_bits = 0;
11429 #else
11430 #  define HEXFP_NV
11431     NV hexfp_nv = 0.0;
11432 #endif
11433     NV hexfp_mult = 1.0;
11434     UV high_non_zero = 0; /* highest digit */
11435     int non_zero_integer_digits = 0;
11436
11437     PERL_ARGS_ASSERT_SCAN_NUM;
11438
11439     /* We use the first character to decide what type of number this is */
11440
11441     switch (*s) {
11442     default:
11443         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11444
11445     /* if it starts with a 0, it could be an octal number, a decimal in
11446        0.13 disguise, or a hexadecimal number, or a binary number. */
11447     case '0':
11448         {
11449           /* variables:
11450              u          holds the "number so far"
11451              overflowed was the number more than we can hold?
11452
11453              Shift is used when we add a digit.  It also serves as an "are
11454              we in octal/hex/binary?" indicator to disallow hex characters
11455              when in octal mode.
11456            */
11457             NV n = 0.0;
11458             UV u = 0;
11459             bool overflowed = FALSE;
11460             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
11461             bool has_digs = FALSE;
11462             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11463             static const char* const bases[5] =
11464               { "", "binary", "", "octal", "hexadecimal" };
11465             static const char* const Bases[5] =
11466               { "", "Binary", "", "Octal", "Hexadecimal" };
11467             static const char* const maxima[5] =
11468               { "",
11469                 "0b11111111111111111111111111111111",
11470                 "",
11471                 "037777777777",
11472                 "0xffffffff" };
11473             const char *base, *Base, *max;
11474
11475             /* check for hex */
11476             if (isALPHA_FOLD_EQ(s[1], 'x')) {
11477                 shift = 4;
11478                 s += 2;
11479                 just_zero = FALSE;
11480             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11481                 shift = 1;
11482                 s += 2;
11483                 just_zero = FALSE;
11484             }
11485             /* check for a decimal in disguise */
11486             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11487                 goto decimal;
11488             /* so it must be octal */
11489             else {
11490                 shift = 3;
11491                 s++;
11492             }
11493
11494             if (*s == '_') {
11495                 WARN_ABOUT_UNDERSCORE();
11496                lastub = s++;
11497             }
11498
11499             base = bases[shift];
11500             Base = Bases[shift];
11501             max  = maxima[shift];
11502
11503             /* read the rest of the number */
11504             for (;;) {
11505                 /* x is used in the overflow test,
11506                    b is the digit we're adding on. */
11507                 UV x, b;
11508
11509                 switch (*s) {
11510
11511                 /* if we don't mention it, we're done */
11512                 default:
11513                     goto out;
11514
11515                 /* _ are ignored -- but warned about if consecutive */
11516                 case '_':
11517                     if (lastub && s == lastub + 1)
11518                         WARN_ABOUT_UNDERSCORE();
11519                     lastub = s++;
11520                     break;
11521
11522                 /* 8 and 9 are not octal */
11523                 case '8': case '9':
11524                     if (shift == 3)
11525                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11526                     /* FALLTHROUGH */
11527
11528                 /* octal digits */
11529                 case '2': case '3': case '4':
11530                 case '5': case '6': case '7':
11531                     if (shift == 1)
11532                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11533                     /* FALLTHROUGH */
11534
11535                 case '0': case '1':
11536                     b = *s++ & 15;              /* ASCII digit -> value of digit */
11537                     goto digit;
11538
11539                 /* hex digits */
11540                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11541                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11542                     /* make sure they said 0x */
11543                     if (shift != 4)
11544                         goto out;
11545                     b = (*s++ & 7) + 9;
11546
11547                     /* Prepare to put the digit we have onto the end
11548                        of the number so far.  We check for overflows.
11549                     */
11550
11551                   digit:
11552                     just_zero = FALSE;
11553                     has_digs = TRUE;
11554                     if (!overflowed) {
11555                         assert(shift >= 0);
11556                         x = u << shift; /* make room for the digit */
11557
11558                         total_bits += shift;
11559
11560                         if ((x >> shift) != u
11561                             && !(PL_hints & HINT_NEW_BINARY)) {
11562                             overflowed = TRUE;
11563                             n = (NV) u;
11564                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11565                                              "Integer overflow in %s number",
11566                                              base);
11567                         } else
11568                             u = x | b;          /* add the digit to the end */
11569                     }
11570                     if (overflowed) {
11571                         n *= nvshift[shift];
11572                         /* If an NV has not enough bits in its
11573                          * mantissa to represent an UV this summing of
11574                          * small low-order numbers is a waste of time
11575                          * (because the NV cannot preserve the
11576                          * low-order bits anyway): we could just
11577                          * remember when did we overflow and in the
11578                          * end just multiply n by the right
11579                          * amount. */
11580                         n += (NV) b;
11581                     }
11582
11583                     if (high_non_zero == 0 && b > 0)
11584                         high_non_zero = b;
11585
11586                     if (high_non_zero)
11587                         non_zero_integer_digits++;
11588
11589                     /* this could be hexfp, but peek ahead
11590                      * to avoid matching ".." */
11591                     if (UNLIKELY(HEXFP_PEEK(s))) {
11592                         goto out;
11593                     }
11594
11595                     break;
11596                 }
11597             }
11598
11599           /* if we get here, we had success: make a scalar value from
11600              the number.
11601           */
11602           out:
11603
11604             /* final misplaced underbar check */
11605             if (s[-1] == '_')
11606                 WARN_ABOUT_UNDERSCORE();
11607
11608             if (UNLIKELY(HEXFP_PEEK(s))) {
11609                 /* Do sloppy (on the underbars) but quick detection
11610                  * (and value construction) for hexfp, the decimal
11611                  * detection will shortly be more thorough with the
11612                  * underbar checks. */
11613                 const char* h = s;
11614                 significant_bits = non_zero_integer_digits * shift;
11615 #ifdef HEXFP_UQUAD
11616                 hexfp_uquad = u;
11617 #else /* HEXFP_NV */
11618                 hexfp_nv = u;
11619 #endif
11620                 /* Ignore the leading zero bits of
11621                  * the high (first) non-zero digit. */
11622                 if (high_non_zero) {
11623                     if (high_non_zero < 0x8)
11624                         significant_bits--;
11625                     if (high_non_zero < 0x4)
11626                         significant_bits--;
11627                     if (high_non_zero < 0x2)
11628                         significant_bits--;
11629                 }
11630
11631                 if (*h == '.') {
11632 #ifdef HEXFP_NV
11633                     NV nv_mult = 1.0;
11634 #endif
11635                     bool accumulate = TRUE;
11636                     U8 b;
11637                     int lim = 1 << shift;
11638                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11639                                *h == '_'); h++) {
11640                         if (isXDIGIT(*h)) {
11641                             significant_bits += shift;
11642 #ifdef HEXFP_UQUAD
11643                             if (accumulate) {
11644                                 if (significant_bits < NV_MANT_DIG) {
11645                                     /* We are in the long "run" of xdigits,
11646                                      * accumulate the full four bits. */
11647                                     assert(shift >= 0);
11648                                     hexfp_uquad <<= shift;
11649                                     hexfp_uquad |= b;
11650                                     hexfp_frac_bits += shift;
11651                                 } else if (significant_bits - shift < NV_MANT_DIG) {
11652                                     /* We are at a hexdigit either at,
11653                                      * or straddling, the edge of mantissa.
11654                                      * We will try grabbing as many as
11655                                      * possible bits. */
11656                                     int tail =
11657                                       significant_bits - NV_MANT_DIG;
11658                                     if (tail <= 0)
11659                                        tail += shift;
11660                                     assert(tail >= 0);
11661                                     hexfp_uquad <<= tail;
11662                                     assert((shift - tail) >= 0);
11663                                     hexfp_uquad |= b >> (shift - tail);
11664                                     hexfp_frac_bits += tail;
11665
11666                                     /* Ignore the trailing zero bits
11667                                      * of the last non-zero xdigit.
11668                                      *
11669                                      * The assumption here is that if
11670                                      * one has input of e.g. the xdigit
11671                                      * eight (0x8), there is only one
11672                                      * bit being input, not the full
11673                                      * four bits.  Conversely, if one
11674                                      * specifies a zero xdigit, the
11675                                      * assumption is that one really
11676                                      * wants all those bits to be zero. */
11677                                     if (b) {
11678                                         if ((b & 0x1) == 0x0) {
11679                                             significant_bits--;
11680                                             if ((b & 0x2) == 0x0) {
11681                                                 significant_bits--;
11682                                                 if ((b & 0x4) == 0x0) {
11683                                                     significant_bits--;
11684                                                 }
11685                                             }
11686                                         }
11687                                     }
11688
11689                                     accumulate = FALSE;
11690                                 }
11691                             } else {
11692                                 /* Keep skipping the xdigits, and
11693                                  * accumulating the significant bits,
11694                                  * but do not shift the uquad
11695                                  * (which would catastrophically drop
11696                                  * high-order bits) or accumulate the
11697                                  * xdigits anymore. */
11698                             }
11699 #else /* HEXFP_NV */
11700                             if (accumulate) {
11701                                 nv_mult /= nvshift[shift];
11702                                 if (nv_mult > 0.0)
11703                                     hexfp_nv += b * nv_mult;
11704                                 else
11705                                     accumulate = FALSE;
11706                             }
11707 #endif
11708                         }
11709                         if (significant_bits >= NV_MANT_DIG)
11710                             accumulate = FALSE;
11711                     }
11712                 }
11713
11714                 if ((total_bits > 0 || significant_bits > 0) &&
11715                     isALPHA_FOLD_EQ(*h, 'p')) {
11716                     bool negexp = FALSE;
11717                     h++;
11718                     if (*h == '+')
11719                         h++;
11720                     else if (*h == '-') {
11721                         negexp = TRUE;
11722                         h++;
11723                     }
11724                     if (isDIGIT(*h)) {
11725                         I32 hexfp_exp = 0;
11726                         while (isDIGIT(*h) || *h == '_') {
11727                             if (isDIGIT(*h)) {
11728                                 hexfp_exp *= 10;
11729                                 hexfp_exp += *h - '0';
11730 #ifdef NV_MIN_EXP
11731                                 if (negexp
11732                                     && -hexfp_exp < NV_MIN_EXP - 1) {
11733                                     /* NOTE: this means that the exponent
11734                                      * underflow warning happens for
11735                                      * the IEEE 754 subnormals (denormals),
11736                                      * because DBL_MIN_EXP etc are the lowest
11737                                      * possible binary (or, rather, DBL_RADIX-base)
11738                                      * exponent for normals, not subnormals.
11739                                      *
11740                                      * This may or may not be a good thing. */
11741                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11742                                                    "Hexadecimal float: exponent underflow");
11743                                     break;
11744                                 }
11745 #endif
11746 #ifdef NV_MAX_EXP
11747                                 if (!negexp
11748                                     && hexfp_exp > NV_MAX_EXP - 1) {
11749                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11750                                                    "Hexadecimal float: exponent overflow");
11751                                     break;
11752                                 }
11753 #endif
11754                             }
11755                             h++;
11756                         }
11757                         if (negexp)
11758                             hexfp_exp = -hexfp_exp;
11759 #ifdef HEXFP_UQUAD
11760                         hexfp_exp -= hexfp_frac_bits;
11761 #endif
11762                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
11763                         hexfp = TRUE;
11764                         goto decimal;
11765                     }
11766                 }
11767             }
11768
11769             if (shift != 3 && !has_digs) {
11770                 /* 0x or 0b with no digits, treat it as an error.
11771                    Originally this backed up the parse before the b or
11772                    x, but that has the potential for silent changes in
11773                    behaviour, like for: "0x.3" and "0x+$foo".
11774                 */
11775                 const char *d = s;
11776                 char *oldbp = PL_bufptr;
11777                 if (*d) ++d; /* so the user sees the bad non-digit */
11778                 PL_bufptr = (char *)d; /* so yyerror reports the context */
11779                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11780                                   shift == 4 ? "hexadecimal" : "binary"));
11781                 PL_bufptr = oldbp;
11782             }
11783
11784             if (overflowed) {
11785                 if (n > 4294967295.0)
11786                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11787                                    "%s number > %s non-portable",
11788                                    Base, max);
11789                 sv = newSVnv(n);
11790             }
11791             else {
11792 #if UVSIZE > 4
11793                 if (u > 0xffffffff)
11794                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11795                                    "%s number > %s non-portable",
11796                                    Base, max);
11797 #endif
11798                 sv = newSVuv(u);
11799             }
11800             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11801                 sv = new_constant(start, s - start, "integer",
11802                                   sv, NULL, NULL, 0, NULL);
11803             else if (PL_hints & HINT_NEW_BINARY)
11804                 sv = new_constant(start, s - start, "binary",
11805                                   sv, NULL, NULL, 0, NULL);
11806         }
11807         break;
11808
11809     /*
11810       handle decimal numbers.
11811       we're also sent here when we read a 0 as the first digit
11812     */
11813     case '1': case '2': case '3': case '4': case '5':
11814     case '6': case '7': case '8': case '9': case '.':
11815       decimal:
11816         d = PL_tokenbuf;
11817         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11818         floatit = FALSE;
11819         if (hexfp) {
11820             floatit = TRUE;
11821             *d++ = '0';
11822             switch (shift) {
11823             case 4:
11824                 *d++ = 'x';
11825                 s = start + 2;
11826                 break;
11827             case 3:
11828                 s = start + 1;
11829                 break;
11830             case 1:
11831                 *d++ = 'b';
11832                 s = start + 2;
11833                 break;
11834             default:
11835                 NOT_REACHED; /* NOTREACHED */
11836             }
11837         }
11838
11839         /* read next group of digits and _ and copy into d */
11840         while (isDIGIT(*s)
11841                || *s == '_'
11842                || UNLIKELY(hexfp && isXDIGIT(*s)))
11843         {
11844             /* skip underscores, checking for misplaced ones
11845                if -w is on
11846             */
11847             if (*s == '_') {
11848                 if (lastub && s == lastub + 1)
11849                     WARN_ABOUT_UNDERSCORE();
11850                 lastub = s++;
11851             }
11852             else {
11853                 /* check for end of fixed-length buffer */
11854                 if (d >= e)
11855                     Perl_croak(aTHX_ "%s", number_too_long);
11856                 /* if we're ok, copy the character */
11857                 *d++ = *s++;
11858             }
11859         }
11860
11861         /* final misplaced underbar check */
11862         if (lastub && s == lastub + 1)
11863             WARN_ABOUT_UNDERSCORE();
11864
11865         /* read a decimal portion if there is one.  avoid
11866            3..5 being interpreted as the number 3. followed
11867            by .5
11868         */
11869         if (*s == '.' && s[1] != '.') {
11870             floatit = TRUE;
11871             *d++ = *s++;
11872
11873             if (*s == '_') {
11874                 WARN_ABOUT_UNDERSCORE();
11875                 lastub = s;
11876             }
11877
11878             /* copy, ignoring underbars, until we run out of digits.
11879             */
11880             for (; isDIGIT(*s)
11881                    || *s == '_'
11882                    || UNLIKELY(hexfp && isXDIGIT(*s));
11883                  s++)
11884             {
11885                 /* fixed length buffer check */
11886                 if (d >= e)
11887                     Perl_croak(aTHX_ "%s", number_too_long);
11888                 if (*s == '_') {
11889                    if (lastub && s == lastub + 1)
11890                         WARN_ABOUT_UNDERSCORE();
11891                    lastub = s;
11892                 }
11893                 else
11894                     *d++ = *s;
11895             }
11896             /* fractional part ending in underbar? */
11897             if (s[-1] == '_')
11898                 WARN_ABOUT_UNDERSCORE();
11899             if (*s == '.' && isDIGIT(s[1])) {
11900                 /* oops, it's really a v-string, but without the "v" */
11901                 s = start;
11902                 goto vstring;
11903             }
11904         }
11905
11906         /* read exponent part, if present */
11907         if ((isALPHA_FOLD_EQ(*s, 'e')
11908               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11909             && memCHRs("+-0123456789_", s[1]))
11910         {
11911             int exp_digits = 0;
11912             const char *save_s = s;
11913             char * save_d = d;
11914
11915             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11916                ditto for p (hexfloats) */
11917             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11918                 /* At least some Mach atof()s don't grok 'E' */
11919                 *d++ = 'e';
11920             }
11921             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11922                 *d++ = 'p';
11923             }
11924
11925             s++;
11926
11927
11928             /* stray preinitial _ */
11929             if (*s == '_') {
11930                 WARN_ABOUT_UNDERSCORE();
11931                 lastub = s++;
11932             }
11933
11934             /* allow positive or negative exponent */
11935             if (*s == '+' || *s == '-')
11936                 *d++ = *s++;
11937
11938             /* stray initial _ */
11939             if (*s == '_') {
11940                 WARN_ABOUT_UNDERSCORE();
11941                 lastub = s++;
11942             }
11943
11944             /* read digits of exponent */
11945             while (isDIGIT(*s) || *s == '_') {
11946                 if (isDIGIT(*s)) {
11947                     ++exp_digits;
11948                     if (d >= e)
11949                         Perl_croak(aTHX_ "%s", number_too_long);
11950                     *d++ = *s++;
11951                 }
11952                 else {
11953                    if (((lastub && s == lastub + 1)
11954                         || (!isDIGIT(s[1]) && s[1] != '_')))
11955                         WARN_ABOUT_UNDERSCORE();
11956                    lastub = s++;
11957                 }
11958             }
11959
11960             if (!exp_digits) {
11961                 /* no exponent digits, the [eEpP] could be for something else,
11962                  * though in practice we don't get here for p since that's preparsed
11963                  * earlier, and results in only the 0xX being consumed, so behave similarly
11964                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
11965                  * next token.
11966                  */
11967                 s = save_s;
11968                 d = save_d;
11969             }
11970             else {
11971                 floatit = TRUE;
11972             }
11973         }
11974
11975
11976         /*
11977            We try to do an integer conversion first if no characters
11978            indicating "float" have been found.
11979          */
11980
11981         if (!floatit) {
11982             UV uv;
11983             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11984
11985             if (flags == IS_NUMBER_IN_UV) {
11986               if (uv <= IV_MAX)
11987                 sv = newSViv(uv); /* Prefer IVs over UVs. */
11988               else
11989                 sv = newSVuv(uv);
11990             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11991               if (uv <= (UV) IV_MIN)
11992                 sv = newSViv(-(IV)uv);
11993               else
11994                 floatit = TRUE;
11995             } else
11996               floatit = TRUE;
11997         }
11998         if (floatit) {
11999             /* terminate the string */
12000             *d = '\0';
12001             if (UNLIKELY(hexfp)) {
12002 #  ifdef NV_MANT_DIG
12003                 if (significant_bits > NV_MANT_DIG)
12004                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12005                                    "Hexadecimal float: mantissa overflow");
12006 #  endif
12007 #ifdef HEXFP_UQUAD
12008                 nv = hexfp_uquad * hexfp_mult;
12009 #else /* HEXFP_NV */
12010                 nv = hexfp_nv * hexfp_mult;
12011 #endif
12012             } else {
12013                 nv = Atof(PL_tokenbuf);
12014             }
12015             sv = newSVnv(nv);
12016         }
12017
12018         if ( floatit
12019              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12020             const char *const key = floatit ? "float" : "integer";
12021             const STRLEN keylen = floatit ? 5 : 7;
12022             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12023                                 key, keylen, sv, NULL, NULL, 0, NULL);
12024         }
12025         break;
12026
12027     /* if it starts with a v, it could be a v-string */
12028     case 'v':
12029     vstring:
12030                 sv = newSV(5); /* preallocate storage space */
12031                 ENTER_with_name("scan_vstring");
12032                 SAVEFREESV(sv);
12033                 s = scan_vstring(s, PL_bufend, sv);
12034                 SvREFCNT_inc_simple_void_NN(sv);
12035                 LEAVE_with_name("scan_vstring");
12036         break;
12037     }
12038
12039     /* make the op for the constant and return */
12040
12041     if (sv)
12042         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12043     else
12044         lvalp->opval = NULL;
12045
12046     return (char *)s;
12047 }
12048
12049 STATIC char *
12050 S_scan_formline(pTHX_ char *s)
12051 {
12052     SV * const stuff = newSVpvs("");
12053     bool needargs = FALSE;
12054     bool eofmt = FALSE;
12055
12056     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12057
12058     while (!needargs) {
12059         char *eol;
12060         if (*s == '.') {
12061             char *t = s+1;
12062 #ifdef PERL_STRICT_CR
12063             while (SPACE_OR_TAB(*t))
12064                 t++;
12065 #else
12066             while (SPACE_OR_TAB(*t) || *t == '\r')
12067                 t++;
12068 #endif
12069             if (*t == '\n' || t == PL_bufend) {
12070                 eofmt = TRUE;
12071                 break;
12072             }
12073         }
12074         eol = (char *) memchr(s,'\n',PL_bufend-s);
12075         if (!eol++)
12076                 eol = PL_bufend;
12077         if (*s != '#') {
12078             char *t;
12079             for (t = s; t < eol; t++) {
12080                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12081                     needargs = FALSE;
12082                     goto enough;        /* ~~ must be first line in formline */
12083                 }
12084                 if (*t == '@' || *t == '^')
12085                     needargs = TRUE;
12086             }
12087             if (eol > s) {
12088                 sv_catpvn(stuff, s, eol-s);
12089 #ifndef PERL_STRICT_CR
12090                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12091                     char *end = SvPVX(stuff) + SvCUR(stuff);
12092                     end[-2] = '\n';
12093                     end[-1] = '\0';
12094                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12095                 }
12096 #endif
12097             }
12098             else
12099               break;
12100         }
12101         s = (char*)eol;
12102         if ((PL_rsfp || PL_parser->filtered)
12103          && PL_parser->form_lex_state == LEX_NORMAL) {
12104             bool got_some;
12105             PL_bufptr = PL_bufend;
12106             COPLINE_INC_WITH_HERELINES;
12107             got_some = lex_next_chunk(0);
12108             CopLINE_dec(PL_curcop);
12109             s = PL_bufptr;
12110             if (!got_some)
12111                 break;
12112         }
12113         incline(s, PL_bufend);
12114     }
12115   enough:
12116     if (!SvCUR(stuff) || needargs)
12117         PL_lex_state = PL_parser->form_lex_state;
12118     if (SvCUR(stuff)) {
12119         PL_expect = XSTATE;
12120         if (needargs) {
12121             const char *s2 = s;
12122             while (isSPACE(*s2) && *s2 != '\n')
12123                 s2++;
12124             if (*s2 == '{') {
12125                 PL_expect = XTERMBLOCK;
12126                 NEXTVAL_NEXTTOKE.ival = 0;
12127                 force_next(DO);
12128             }
12129             NEXTVAL_NEXTTOKE.ival = 0;
12130             force_next(FORMLBRACK);
12131         }
12132         if (!IN_BYTES) {
12133             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12134                 SvUTF8_on(stuff);
12135         }
12136         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12137         force_next(THING);
12138     }
12139     else {
12140         SvREFCNT_dec(stuff);
12141         if (eofmt)
12142             PL_lex_formbrack = 0;
12143     }
12144     return s;
12145 }
12146
12147 I32
12148 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12149 {
12150     const I32 oldsavestack_ix = PL_savestack_ix;
12151     CV* const outsidecv = PL_compcv;
12152
12153     SAVEI32(PL_subline);
12154     save_item(PL_subname);
12155     SAVESPTR(PL_compcv);
12156
12157     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12158     CvFLAGS(PL_compcv) |= flags;
12159
12160     PL_subline = CopLINE(PL_curcop);
12161     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12162     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12163     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12164     if (outsidecv && CvPADLIST(outsidecv))
12165         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12166
12167     return oldsavestack_ix;
12168 }
12169
12170
12171 /* Do extra initialisation of a CV (typically one just created by
12172  * start_subparse()) if that CV is for a named sub
12173  */
12174
12175 void
12176 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12177 {
12178     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12179
12180     if (nameop->op_type == OP_CONST) {
12181         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12182         if (   strEQ(name, "BEGIN")
12183             || strEQ(name, "END")
12184             || strEQ(name, "INIT")
12185             || strEQ(name, "CHECK")
12186             || strEQ(name, "UNITCHECK")
12187         )
12188           CvSPECIAL_on(cv);
12189     }
12190     else
12191     /* State subs inside anonymous subs need to be
12192      clonable themselves. */
12193     if (   CvANON(CvOUTSIDE(cv))
12194         || CvCLONE(CvOUTSIDE(cv))
12195         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12196                         CvOUTSIDE(cv)
12197                      ))[nameop->op_targ])
12198     )
12199       CvCLONE_on(cv);
12200 }
12201
12202
12203 static int
12204 S_yywarn(pTHX_ const char *const s, U32 flags)
12205 {
12206     PERL_ARGS_ASSERT_YYWARN;
12207
12208     PL_in_eval |= EVAL_WARNONLY;
12209     yyerror_pv(s, flags);
12210     return 0;
12211 }
12212
12213 void
12214 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12215 {
12216     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12217
12218     if (PL_minus_c)
12219         Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12220     else {
12221         Perl_croak(aTHX_
12222                 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12223     }
12224     NOT_REACHED; /* NOTREACHED */
12225 }
12226
12227 void
12228 Perl_yyquit(pTHX)
12229 {
12230     /* Called, after at least one error has been found, to abort the parse now,
12231      * instead of trying to forge ahead */
12232
12233     yyerror_pvn(NULL, 0, 0);
12234 }
12235
12236 int
12237 Perl_yyerror(pTHX_ const char *const s)
12238 {
12239     PERL_ARGS_ASSERT_YYERROR;
12240     return yyerror_pvn(s, strlen(s), 0);
12241 }
12242
12243 int
12244 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12245 {
12246     PERL_ARGS_ASSERT_YYERROR_PV;
12247     return yyerror_pvn(s, strlen(s), flags);
12248 }
12249
12250 int
12251 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12252 {
12253     const char *context = NULL;
12254     int contlen = -1;
12255     SV *msg;
12256     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12257     int yychar  = PL_parser->yychar;
12258
12259     /* Output error message 's' with length 'len'.  'flags' are SV flags that
12260      * apply.  If the number of errors found is large enough, it abandons
12261      * parsing.  If 's' is NULL, there is no message, and it abandons
12262      * processing unconditionally */
12263
12264     if (s != NULL) {
12265         if (!yychar || (yychar == ';' && !PL_rsfp))
12266             sv_catpvs(where_sv, "at EOF");
12267         else if (   PL_oldoldbufptr
12268                  && PL_bufptr > PL_oldoldbufptr
12269                  && PL_bufptr - PL_oldoldbufptr < 200
12270                  && PL_oldoldbufptr != PL_oldbufptr
12271                  && PL_oldbufptr != PL_bufptr)
12272         {
12273             /*
12274                     Only for NetWare:
12275                     The code below is removed for NetWare because it
12276                     abends/crashes on NetWare when the script has error such as
12277                     not having the closing quotes like:
12278                         if ($var eq "value)
12279                     Checking of white spaces is anyway done in NetWare code.
12280             */
12281 #ifndef NETWARE
12282             while (isSPACE(*PL_oldoldbufptr))
12283                 PL_oldoldbufptr++;
12284 #endif
12285             context = PL_oldoldbufptr;
12286             contlen = PL_bufptr - PL_oldoldbufptr;
12287         }
12288         else if (  PL_oldbufptr
12289                 && PL_bufptr > PL_oldbufptr
12290                 && PL_bufptr - PL_oldbufptr < 200
12291                 && PL_oldbufptr != PL_bufptr) {
12292             /*
12293                     Only for NetWare:
12294                     The code below is removed for NetWare because it
12295                     abends/crashes on NetWare when the script has error such as
12296                     not having the closing quotes like:
12297                         if ($var eq "value)
12298                     Checking of white spaces is anyway done in NetWare code.
12299             */
12300 #ifndef NETWARE
12301             while (isSPACE(*PL_oldbufptr))
12302                 PL_oldbufptr++;
12303 #endif
12304             context = PL_oldbufptr;
12305             contlen = PL_bufptr - PL_oldbufptr;
12306         }
12307         else if (yychar > 255)
12308             sv_catpvs(where_sv, "next token ???");
12309         else if (yychar == YYEMPTY) {
12310             if (PL_lex_state == LEX_NORMAL)
12311                 sv_catpvs(where_sv, "at end of line");
12312             else if (PL_lex_inpat)
12313                 sv_catpvs(where_sv, "within pattern");
12314             else
12315                 sv_catpvs(where_sv, "within string");
12316         }
12317         else {
12318             sv_catpvs(where_sv, "next char ");
12319             if (yychar < 32)
12320                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12321             else if (isPRINT_LC(yychar)) {
12322                 const char string = yychar;
12323                 sv_catpvn(where_sv, &string, 1);
12324             }
12325             else
12326                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12327         }
12328         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12329         Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12330             OutCopFILE(PL_curcop),
12331             (IV)(PL_parser->preambling == NOLINE
12332                    ? CopLINE(PL_curcop)
12333                    : PL_parser->preambling));
12334         if (context)
12335             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12336                                  UTF8fARG(UTF, contlen, context));
12337         else
12338             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12339         if (   PL_multi_start < PL_multi_end
12340             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12341         {
12342             Perl_sv_catpvf(aTHX_ msg,
12343             "  (Might be a runaway multi-line %c%c string starting on"
12344             " line %" IVdf ")\n",
12345                     (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12346             PL_multi_end = 0;
12347         }
12348         if (PL_in_eval & EVAL_WARNONLY) {
12349             PL_in_eval &= ~EVAL_WARNONLY;
12350             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12351         }
12352         else {
12353             qerror(msg);
12354         }
12355     }
12356     if (s == NULL || PL_error_count >= 10) {
12357         const char * msg = "";
12358         const char * const name = OutCopFILE(PL_curcop);
12359
12360         if (PL_in_eval) {
12361             SV * errsv = ERRSV;
12362             if (SvCUR(errsv)) {
12363                 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12364             }
12365         }
12366
12367         if (s == NULL) {
12368             abort_execution(msg, name);
12369         }
12370         else {
12371             Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12372         }
12373     }
12374     PL_in_my = 0;
12375     PL_in_my_stash = NULL;
12376     return 0;
12377 }
12378
12379 STATIC char*
12380 S_swallow_bom(pTHX_ U8 *s)
12381 {
12382     const STRLEN slen = SvCUR(PL_linestr);
12383
12384     PERL_ARGS_ASSERT_SWALLOW_BOM;
12385
12386     switch (s[0]) {
12387     case 0xFF:
12388         if (s[1] == 0xFE) {
12389             /* UTF-16 little-endian? (or UTF-32LE?) */
12390             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
12391                 /* diag_listed_as: Unsupported script encoding %s */
12392                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12393 #ifndef PERL_NO_UTF16_FILTER
12394 #ifdef DEBUGGING
12395             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12396 #endif
12397             s += 2;
12398             if (PL_bufend > (char*)s) {
12399                 s = add_utf16_textfilter(s, TRUE);
12400             }
12401 #else
12402             /* diag_listed_as: Unsupported script encoding %s */
12403             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12404 #endif
12405         }
12406         break;
12407     case 0xFE:
12408         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
12409 #ifndef PERL_NO_UTF16_FILTER
12410 #ifdef DEBUGGING
12411             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12412 #endif
12413             s += 2;
12414             if (PL_bufend > (char *)s) {
12415                 s = add_utf16_textfilter(s, FALSE);
12416             }
12417 #else
12418             /* diag_listed_as: Unsupported script encoding %s */
12419             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12420 #endif
12421         }
12422         break;
12423     case BOM_UTF8_FIRST_BYTE: {
12424         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12425 #ifdef DEBUGGING
12426             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12427 #endif
12428             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
12429         }
12430         break;
12431     }
12432     case 0:
12433         if (slen > 3) {
12434              if (s[1] == 0) {
12435                   if (s[2] == 0xFE && s[3] == 0xFF) {
12436                        /* UTF-32 big-endian */
12437                        /* diag_listed_as: Unsupported script encoding %s */
12438                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12439                   }
12440              }
12441              else if (s[2] == 0 && s[3] != 0) {
12442                   /* Leading bytes
12443                    * 00 xx 00 xx
12444                    * are a good indicator of UTF-16BE. */
12445 #ifndef PERL_NO_UTF16_FILTER
12446 #ifdef DEBUGGING
12447                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12448 #endif
12449                   s = add_utf16_textfilter(s, FALSE);
12450 #else
12451                   /* diag_listed_as: Unsupported script encoding %s */
12452                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12453 #endif
12454              }
12455         }
12456         break;
12457
12458     default:
12459          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12460                   /* Leading bytes
12461                    * xx 00 xx 00
12462                    * are a good indicator of UTF-16LE. */
12463 #ifndef PERL_NO_UTF16_FILTER
12464 #ifdef DEBUGGING
12465               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12466 #endif
12467               s = add_utf16_textfilter(s, TRUE);
12468 #else
12469               /* diag_listed_as: Unsupported script encoding %s */
12470               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12471 #endif
12472          }
12473     }
12474     return (char*)s;
12475 }
12476
12477
12478 #ifndef PERL_NO_UTF16_FILTER
12479 static I32
12480 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12481 {
12482     SV *const filter = FILTER_DATA(idx);
12483     /* We re-use this each time round, throwing the contents away before we
12484        return.  */
12485     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12486     SV *const utf8_buffer = filter;
12487     IV status = IoPAGE(filter);
12488     const bool reverse = cBOOL(IoLINES(filter));
12489     I32 retval;
12490
12491     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12492
12493     /* As we're automatically added, at the lowest level, and hence only called
12494        from this file, we can be sure that we're not called in block mode. Hence
12495        don't bother writing code to deal with block mode.  */
12496     if (maxlen) {
12497         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12498     }
12499     if (status < 0) {
12500         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12501     }
12502     DEBUG_P(PerlIO_printf(Perl_debug_log,
12503                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12504                           FPTR2DPTR(void *, S_utf16_textfilter),
12505                           reverse ? 'l' : 'b', idx, maxlen, status,
12506                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12507
12508     while (1) {
12509         STRLEN chars;
12510         STRLEN have;
12511         Size_t newlen;
12512         U8 *end;
12513         /* First, look in our buffer of existing UTF-8 data:  */
12514         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12515
12516         if (nl) {
12517             ++nl;
12518         } else if (status == 0) {
12519             /* EOF */
12520             IoPAGE(filter) = 0;
12521             nl = SvEND(utf8_buffer);
12522         }
12523         if (nl) {
12524             STRLEN got = nl - SvPVX(utf8_buffer);
12525             /* Did we have anything to append?  */
12526             retval = got != 0;
12527             sv_catpvn(sv, SvPVX(utf8_buffer), got);
12528             /* Everything else in this code works just fine if SVp_POK isn't
12529                set.  This, however, needs it, and we need it to work, else
12530                we loop infinitely because the buffer is never consumed.  */
12531             sv_chop(utf8_buffer, nl);
12532             break;
12533         }
12534
12535         /* OK, not a complete line there, so need to read some more UTF-16.
12536            Read an extra octect if the buffer currently has an odd number. */
12537         while (1) {
12538             if (status <= 0)
12539                 break;
12540             if (SvCUR(utf16_buffer) >= 2) {
12541                 /* Location of the high octet of the last complete code point.
12542                    Gosh, UTF-16 is a pain. All the benefits of variable length,
12543                    *coupled* with all the benefits of partial reads and
12544                    endianness.  */
12545                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12546                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12547
12548                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12549                     break;
12550                 }
12551
12552                 /* We have the first half of a surrogate. Read more.  */
12553                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12554             }
12555
12556             status = FILTER_READ(idx + 1, utf16_buffer,
12557                                  160 + (SvCUR(utf16_buffer) & 1));
12558             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12559             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12560             if (status < 0) {
12561                 /* Error */
12562                 IoPAGE(filter) = status;
12563                 return status;
12564             }
12565         }
12566
12567         /* 'chars' isn't quite the right name, as code points above 0xFFFF
12568          * require 4 bytes per char */
12569         chars = SvCUR(utf16_buffer) >> 1;
12570         have = SvCUR(utf8_buffer);
12571
12572         /* Assume the worst case size as noted by the functions: twice the
12573          * number of input bytes */
12574         SvGROW(utf8_buffer, have + chars * 4 + 1);
12575
12576         if (reverse) {
12577             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12578                                          (U8*)SvPVX_const(utf8_buffer) + have,
12579                                          chars * 2, &newlen);
12580         } else {
12581             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12582                                 (U8*)SvPVX_const(utf8_buffer) + have,
12583                                 chars * 2, &newlen);
12584         }
12585         SvCUR_set(utf8_buffer, have + newlen);
12586         *end = '\0';
12587
12588         /* No need to keep this SV "well-formed" with a '\0' after the end, as
12589            it's private to us, and utf16_to_utf8{,reversed} take a
12590            (pointer,length) pair, rather than a NUL-terminated string.  */
12591         if(SvCUR(utf16_buffer) & 1) {
12592             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12593             SvCUR_set(utf16_buffer, 1);
12594         } else {
12595             SvCUR_set(utf16_buffer, 0);
12596         }
12597     }
12598     DEBUG_P(PerlIO_printf(Perl_debug_log,
12599                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12600                           status,
12601                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12602     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12603     return retval;
12604 }
12605
12606 static U8 *
12607 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12608 {
12609     SV *filter = filter_add(S_utf16_textfilter, NULL);
12610
12611     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12612
12613     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12614     SvPVCLEAR(filter);
12615     IoLINES(filter) = reversed;
12616     IoPAGE(filter) = 1; /* Not EOF */
12617
12618     /* Sadly, we have to return a valid pointer, come what may, so we have to
12619        ignore any error return from this.  */
12620     SvCUR_set(PL_linestr, 0);
12621     if (FILTER_READ(0, PL_linestr, 0)) {
12622         SvUTF8_on(PL_linestr);
12623     } else {
12624         SvUTF8_on(PL_linestr);
12625     }
12626     PL_bufend = SvEND(PL_linestr);
12627     return (U8*)SvPVX(PL_linestr);
12628 }
12629 #endif
12630
12631 /*
12632 Returns a pointer to the next character after the parsed
12633 vstring, as well as updating the passed in sv.
12634
12635 Function must be called like
12636
12637         sv = sv_2mortal(newSV(5));
12638         s = scan_vstring(s,e,sv);
12639
12640 where s and e are the start and end of the string.
12641 The sv should already be large enough to store the vstring
12642 passed in, for performance reasons.
12643
12644 This function may croak if fatal warnings are enabled in the
12645 calling scope, hence the sv_2mortal in the example (to prevent
12646 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
12647 sv_2mortal.
12648
12649 */
12650
12651 char *
12652 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12653 {
12654     const char *pos = s;
12655     const char *start = s;
12656
12657     PERL_ARGS_ASSERT_SCAN_VSTRING;
12658
12659     if (*pos == 'v') pos++;  /* get past 'v' */
12660     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12661         pos++;
12662     if ( *pos != '.') {
12663         /* this may not be a v-string if followed by => */
12664         const char *next = pos;
12665         while (next < e && isSPACE(*next))
12666             ++next;
12667         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12668             /* return string not v-string */
12669             sv_setpvn(sv,(char *)s,pos-s);
12670             return (char *)pos;
12671         }
12672     }
12673
12674     if (!isALPHA(*pos)) {
12675         U8 tmpbuf[UTF8_MAXBYTES+1];
12676
12677         if (*s == 'v')
12678             s++;  /* get past 'v' */
12679
12680         SvPVCLEAR(sv);
12681
12682         for (;;) {
12683             /* this is atoi() that tolerates underscores */
12684             U8 *tmpend;
12685             UV rev = 0;
12686             const char *end = pos;
12687             UV mult = 1;
12688             while (--end >= s) {
12689                 if (*end != '_') {
12690                     const UV orev = rev;
12691                     rev += (*end - '0') * mult;
12692                     mult *= 10;
12693                     if (orev > rev)
12694                         /* diag_listed_as: Integer overflow in %s number */
12695                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12696                                          "Integer overflow in decimal number");
12697                 }
12698             }
12699
12700             /* Append native character for the rev point */
12701             tmpend = uvchr_to_utf8(tmpbuf, rev);
12702             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12703             if (!UVCHR_IS_INVARIANT(rev))
12704                  SvUTF8_on(sv);
12705             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12706                  s = ++pos;
12707             else {
12708                  s = pos;
12709                  break;
12710             }
12711             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12712                  pos++;
12713         }
12714         SvPOK_on(sv);
12715         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12716         SvRMAGICAL_on(sv);
12717     }
12718     return (char *)s;
12719 }
12720
12721 int
12722 Perl_keyword_plugin_standard(pTHX_
12723         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12724 {
12725     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12726     PERL_UNUSED_CONTEXT;
12727     PERL_UNUSED_ARG(keyword_ptr);
12728     PERL_UNUSED_ARG(keyword_len);
12729     PERL_UNUSED_ARG(op_ptr);
12730     return KEYWORD_PLUGIN_DECLINE;
12731 }
12732
12733 /*
12734 =for apidoc wrap_keyword_plugin
12735
12736 Puts a C function into the chain of keyword plugins.  This is the
12737 preferred way to manipulate the L</PL_keyword_plugin> variable.
12738 C<new_plugin> is a pointer to the C function that is to be added to the
12739 keyword plugin chain, and C<old_plugin_p> points to the storage location
12740 where a pointer to the next function in the chain will be stored.  The
12741 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12742 while the value previously stored there is written to C<*old_plugin_p>.
12743
12744 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12745 to hook keyword parsing may find itself invoked more than once per
12746 process, typically in different threads.  To handle that situation, this
12747 function is idempotent.  The location C<*old_plugin_p> must initially
12748 (once per process) contain a null pointer.  A C variable of static
12749 duration (declared at file scope, typically also marked C<static> to give
12750 it internal linkage) will be implicitly initialised appropriately, if it
12751 does not have an explicit initialiser.  This function will only actually
12752 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
12753 function is also thread safe on the small scale.  It uses appropriate
12754 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12755
12756 When this function is called, the function referenced by C<new_plugin>
12757 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12758 In a threading situation, C<new_plugin> may be called immediately, even
12759 before this function has returned.  C<*old_plugin_p> will always be
12760 appropriately set before C<new_plugin> is called.  If C<new_plugin>
12761 decides not to do anything special with the identifier that it is given
12762 (which is the usual case for most calls to a keyword plugin), it must
12763 chain the plugin function referenced by C<*old_plugin_p>.
12764
12765 Taken all together, XS code to install a keyword plugin should typically
12766 look something like this:
12767
12768     static Perl_keyword_plugin_t next_keyword_plugin;
12769     static OP *my_keyword_plugin(pTHX_
12770         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12771     {
12772         if (memEQs(keyword_ptr, keyword_len,
12773                    "my_new_keyword")) {
12774             ...
12775         } else {
12776             return next_keyword_plugin(aTHX_
12777                 keyword_ptr, keyword_len, op_ptr);
12778         }
12779     }
12780     BOOT:
12781         wrap_keyword_plugin(my_keyword_plugin,
12782                             &next_keyword_plugin);
12783
12784 Direct access to L</PL_keyword_plugin> should be avoided.
12785
12786 =cut
12787 */
12788
12789 void
12790 Perl_wrap_keyword_plugin(pTHX_
12791     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12792 {
12793     dVAR;
12794
12795     PERL_UNUSED_CONTEXT;
12796     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12797     if (*old_plugin_p) return;
12798     KEYWORD_PLUGIN_MUTEX_LOCK;
12799     if (!*old_plugin_p) {
12800         *old_plugin_p = PL_keyword_plugin;
12801         PL_keyword_plugin = new_plugin;
12802     }
12803     KEYWORD_PLUGIN_MUTEX_UNLOCK;
12804 }
12805
12806 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12807 static void
12808 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12809 {
12810     SAVEI32(PL_lex_brackets);
12811     if (PL_lex_brackets > 100)
12812         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12813     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12814     SAVEI32(PL_lex_allbrackets);
12815     PL_lex_allbrackets = 0;
12816     SAVEI8(PL_lex_fakeeof);
12817     PL_lex_fakeeof = (U8)fakeeof;
12818     if(yyparse(gramtype) && !PL_parser->error_count)
12819         qerror(Perl_mess(aTHX_ "Parse error"));
12820 }
12821
12822 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12823 static OP *
12824 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12825 {
12826     OP *o;
12827     ENTER;
12828     SAVEVPTR(PL_eval_root);
12829     PL_eval_root = NULL;
12830     parse_recdescent(gramtype, fakeeof);
12831     o = PL_eval_root;
12832     LEAVE;
12833     return o;
12834 }
12835
12836 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12837 static OP *
12838 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12839 {
12840     OP *exprop;
12841     if (flags & ~PARSE_OPTIONAL)
12842         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12843     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12844     if (!exprop && !(flags & PARSE_OPTIONAL)) {
12845         if (!PL_parser->error_count)
12846             qerror(Perl_mess(aTHX_ "Parse error"));
12847         exprop = newOP(OP_NULL, 0);
12848     }
12849     return exprop;
12850 }
12851
12852 /*
12853 =for apidoc parse_arithexpr
12854
12855 Parse a Perl arithmetic expression.  This may contain operators of precedence
12856 down to the bit shift operators.  The expression must be followed (and thus
12857 terminated) either by a comparison or lower-precedence operator or by
12858 something that would normally terminate an expression such as semicolon.
12859 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12860 otherwise it is mandatory.  It is up to the caller to ensure that the
12861 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12862 the source of the code to be parsed and the lexical context for the
12863 expression.
12864
12865 The op tree representing the expression is returned.  If an optional
12866 expression is absent, a null pointer is returned, otherwise the pointer
12867 will be non-null.
12868
12869 If an error occurs in parsing or compilation, in most cases a valid op
12870 tree is returned anyway.  The error is reflected in the parser state,
12871 normally resulting in a single exception at the top level of parsing
12872 which covers all the compilation errors that occurred.  Some compilation
12873 errors, however, will throw an exception immediately.
12874
12875 =for apidoc Amnh||PARSE_OPTIONAL
12876
12877 =cut
12878
12879 */
12880
12881 OP *
12882 Perl_parse_arithexpr(pTHX_ U32 flags)
12883 {
12884     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12885 }
12886
12887 /*
12888 =for apidoc parse_termexpr
12889
12890 Parse a Perl term expression.  This may contain operators of precedence
12891 down to the assignment operators.  The expression must be followed (and thus
12892 terminated) either by a comma or lower-precedence operator or by
12893 something that would normally terminate an expression such as semicolon.
12894 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12895 otherwise it is mandatory.  It is up to the caller to ensure that the
12896 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12897 the source of the code to be parsed and the lexical context for the
12898 expression.
12899
12900 The op tree representing the expression is returned.  If an optional
12901 expression is absent, a null pointer is returned, otherwise the pointer
12902 will be non-null.
12903
12904 If an error occurs in parsing or compilation, in most cases a valid op
12905 tree is returned anyway.  The error is reflected in the parser state,
12906 normally resulting in a single exception at the top level of parsing
12907 which covers all the compilation errors that occurred.  Some compilation
12908 errors, however, will throw an exception immediately.
12909
12910 =cut
12911 */
12912
12913 OP *
12914 Perl_parse_termexpr(pTHX_ U32 flags)
12915 {
12916     return parse_expr(LEX_FAKEEOF_COMMA, flags);
12917 }
12918
12919 /*
12920 =for apidoc parse_listexpr
12921
12922 Parse a Perl list expression.  This may contain operators of precedence
12923 down to the comma operator.  The expression must be followed (and thus
12924 terminated) either by a low-precedence logic operator such as C<or> or by
12925 something that would normally terminate an expression such as semicolon.
12926 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12927 otherwise it is mandatory.  It is up to the caller to ensure that the
12928 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12929 the source of the code to be parsed and the lexical context for the
12930 expression.
12931
12932 The op tree representing the expression is returned.  If an optional
12933 expression is absent, a null pointer is returned, otherwise the pointer
12934 will be non-null.
12935
12936 If an error occurs in parsing or compilation, in most cases a valid op
12937 tree is returned anyway.  The error is reflected in the parser state,
12938 normally resulting in a single exception at the top level of parsing
12939 which covers all the compilation errors that occurred.  Some compilation
12940 errors, however, will throw an exception immediately.
12941
12942 =cut
12943 */
12944
12945 OP *
12946 Perl_parse_listexpr(pTHX_ U32 flags)
12947 {
12948     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12949 }
12950
12951 /*
12952 =for apidoc parse_fullexpr
12953
12954 Parse a single complete Perl expression.  This allows the full
12955 expression grammar, including the lowest-precedence operators such
12956 as C<or>.  The expression must be followed (and thus terminated) by a
12957 token that an expression would normally be terminated by: end-of-file,
12958 closing bracketing punctuation, semicolon, or one of the keywords that
12959 signals a postfix expression-statement modifier.  If C<flags> has the
12960 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12961 mandatory.  It is up to the caller to ensure that the dynamic parser
12962 state (L</PL_parser> et al) is correctly set to reflect the source of
12963 the code to be parsed and the lexical context for the expression.
12964
12965 The op tree representing the expression is returned.  If an optional
12966 expression is absent, a null pointer is returned, otherwise the pointer
12967 will be non-null.
12968
12969 If an error occurs in parsing or compilation, in most cases a valid op
12970 tree is returned anyway.  The error is reflected in the parser state,
12971 normally resulting in a single exception at the top level of parsing
12972 which covers all the compilation errors that occurred.  Some compilation
12973 errors, however, will throw an exception immediately.
12974
12975 =cut
12976 */
12977
12978 OP *
12979 Perl_parse_fullexpr(pTHX_ U32 flags)
12980 {
12981     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12982 }
12983
12984 /*
12985 =for apidoc parse_block
12986
12987 Parse a single complete Perl code block.  This consists of an opening
12988 brace, a sequence of statements, and a closing brace.  The block
12989 constitutes a lexical scope, so C<my> variables and various compile-time
12990 effects can be contained within it.  It is up to the caller to ensure
12991 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12992 reflect the source of the code to be parsed and the lexical context for
12993 the statement.
12994
12995 The op tree representing the code block is returned.  This is always a
12996 real op, never a null pointer.  It will normally be a C<lineseq> list,
12997 including C<nextstate> or equivalent ops.  No ops to construct any kind
12998 of runtime scope are included by virtue of it being a block.
12999
13000 If an error occurs in parsing or compilation, in most cases a valid op
13001 tree (most likely null) is returned anyway.  The error is reflected in
13002 the parser state, normally resulting in a single exception at the top
13003 level of parsing which covers all the compilation errors that occurred.
13004 Some compilation errors, however, will throw an exception immediately.
13005
13006 The C<flags> parameter is reserved for future use, and must always
13007 be zero.
13008
13009 =cut
13010 */
13011
13012 OP *
13013 Perl_parse_block(pTHX_ U32 flags)
13014 {
13015     if (flags)
13016         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13017     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13018 }
13019
13020 /*
13021 =for apidoc parse_barestmt
13022
13023 Parse a single unadorned Perl statement.  This may be a normal imperative
13024 statement or a declaration that has compile-time effect.  It does not
13025 include any label or other affixture.  It is up to the caller to ensure
13026 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13027 reflect the source of the code to be parsed and the lexical context for
13028 the statement.
13029
13030 The op tree representing the statement is returned.  This may be a
13031 null pointer if the statement is null, for example if it was actually
13032 a subroutine definition (which has compile-time side effects).  If not
13033 null, it will be ops directly implementing the statement, suitable to
13034 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13035 equivalent op (except for those embedded in a scope contained entirely
13036 within the statement).
13037
13038 If an error occurs in parsing or compilation, in most cases a valid op
13039 tree (most likely null) is returned anyway.  The error is reflected in
13040 the parser state, normally resulting in a single exception at the top
13041 level of parsing which covers all the compilation errors that occurred.
13042 Some compilation errors, however, will throw an exception immediately.
13043
13044 The C<flags> parameter is reserved for future use, and must always
13045 be zero.
13046
13047 =cut
13048 */
13049
13050 OP *
13051 Perl_parse_barestmt(pTHX_ U32 flags)
13052 {
13053     if (flags)
13054         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13055     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13056 }
13057
13058 /*
13059 =for apidoc parse_label
13060
13061 Parse a single label, possibly optional, of the type that may prefix a
13062 Perl statement.  It is up to the caller to ensure that the dynamic parser
13063 state (L</PL_parser> et al) is correctly set to reflect the source of
13064 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13065 label is optional, otherwise it is mandatory.
13066
13067 The name of the label is returned in the form of a fresh scalar.  If an
13068 optional label is absent, a null pointer is returned.
13069
13070 If an error occurs in parsing, which can only occur if the label is
13071 mandatory, a valid label is returned anyway.  The error is reflected in
13072 the parser state, normally resulting in a single exception at the top
13073 level of parsing which covers all the compilation errors that occurred.
13074
13075 =cut
13076 */
13077
13078 SV *
13079 Perl_parse_label(pTHX_ U32 flags)
13080 {
13081     if (flags & ~PARSE_OPTIONAL)
13082         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13083     if (PL_nexttoke) {
13084         PL_parser->yychar = yylex();
13085         if (PL_parser->yychar == LABEL) {
13086             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13087             PL_parser->yychar = YYEMPTY;
13088             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13089             op_free(pl_yylval.opval);
13090             return labelsv;
13091         } else {
13092             yyunlex();
13093             goto no_label;
13094         }
13095     } else {
13096         char *s, *t;
13097         STRLEN wlen, bufptr_pos;
13098         lex_read_space(0);
13099         t = s = PL_bufptr;
13100         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13101             goto no_label;
13102         t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13103         if (word_takes_any_delimiter(s, wlen))
13104             goto no_label;
13105         bufptr_pos = s - SvPVX(PL_linestr);
13106         PL_bufptr = t;
13107         lex_read_space(LEX_KEEP_PREVIOUS);
13108         t = PL_bufptr;
13109         s = SvPVX(PL_linestr) + bufptr_pos;
13110         if (t[0] == ':' && t[1] != ':') {
13111             PL_oldoldbufptr = PL_oldbufptr;
13112             PL_oldbufptr = s;
13113             PL_bufptr = t+1;
13114             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13115         } else {
13116             PL_bufptr = s;
13117             no_label:
13118             if (flags & PARSE_OPTIONAL) {
13119                 return NULL;
13120             } else {
13121                 qerror(Perl_mess(aTHX_ "Parse error"));
13122                 return newSVpvs("x");
13123             }
13124         }
13125     }
13126 }
13127
13128 /*
13129 =for apidoc parse_fullstmt
13130
13131 Parse a single complete Perl statement.  This may be a normal imperative
13132 statement or a declaration that has compile-time effect, and may include
13133 optional labels.  It is up to the caller to ensure that the dynamic
13134 parser state (L</PL_parser> et al) is correctly set to reflect the source
13135 of the code to be parsed and the lexical context for the statement.
13136
13137 The op tree representing the statement is returned.  This may be a
13138 null pointer if the statement is null, for example if it was actually
13139 a subroutine definition (which has compile-time side effects).  If not
13140 null, it will be the result of a L</newSTATEOP> call, normally including
13141 a C<nextstate> or equivalent op.
13142
13143 If an error occurs in parsing or compilation, in most cases a valid op
13144 tree (most likely null) is returned anyway.  The error is reflected in
13145 the parser state, normally resulting in a single exception at the top
13146 level of parsing which covers all the compilation errors that occurred.
13147 Some compilation errors, however, will throw an exception immediately.
13148
13149 The C<flags> parameter is reserved for future use, and must always
13150 be zero.
13151
13152 =cut
13153 */
13154
13155 OP *
13156 Perl_parse_fullstmt(pTHX_ U32 flags)
13157 {
13158     if (flags)
13159         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13160     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13161 }
13162
13163 /*
13164 =for apidoc parse_stmtseq
13165
13166 Parse a sequence of zero or more Perl statements.  These may be normal
13167 imperative statements, including optional labels, or declarations
13168 that have compile-time effect, or any mixture thereof.  The statement
13169 sequence ends when a closing brace or end-of-file is encountered in a
13170 place where a new statement could have validly started.  It is up to
13171 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13172 is correctly set to reflect the source of the code to be parsed and the
13173 lexical context for the statements.
13174
13175 The op tree representing the statement sequence is returned.  This may
13176 be a null pointer if the statements were all null, for example if there
13177 were no statements or if there were only subroutine definitions (which
13178 have compile-time side effects).  If not null, it will be a C<lineseq>
13179 list, normally including C<nextstate> or equivalent ops.
13180
13181 If an error occurs in parsing or compilation, in most cases a valid op
13182 tree is returned anyway.  The error is reflected in the parser state,
13183 normally resulting in a single exception at the top level of parsing
13184 which covers all the compilation errors that occurred.  Some compilation
13185 errors, however, will throw an exception immediately.
13186
13187 The C<flags> parameter is reserved for future use, and must always
13188 be zero.
13189
13190 =cut
13191 */
13192
13193 OP *
13194 Perl_parse_stmtseq(pTHX_ U32 flags)
13195 {
13196     OP *stmtseqop;
13197     I32 c;
13198     if (flags)
13199         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13200     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13201     c = lex_peek_unichar(0);
13202     if (c != -1 && c != /*{*/'}')
13203         qerror(Perl_mess(aTHX_ "Parse error"));
13204     return stmtseqop;
13205 }
13206
13207 /*
13208 =for apidoc parse_subsignature
13209
13210 Parse a subroutine signature declaration. This is the contents of the
13211 parentheses following a named or anonymous subroutine declaration when the
13212 C<signatures> feature is enabled. Note that this function neither expects
13213 nor consumes the opening and closing parentheses around the signature; it
13214 is the caller's job to handle these.
13215
13216 This function must only be called during parsing of a subroutine; after
13217 L</start_subparse> has been called. It might allocate lexical variables on
13218 the pad for the current subroutine.
13219
13220 The op tree to unpack the arguments from the stack at runtime is returned.
13221 This op tree should appear at the beginning of the compiled function. The
13222 caller may wish to use L</op_append_list> to build their function body
13223 after it, or splice it together with the body before calling L</newATTRSUB>.
13224
13225 The C<flags> parameter is reserved for future use, and must always
13226 be zero.
13227
13228 =cut
13229 */
13230
13231 OP *
13232 Perl_parse_subsignature(pTHX_ U32 flags)
13233 {
13234     if (flags)
13235         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13236     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13237 }
13238
13239 /*
13240  * ex: set ts=8 sts=4 sw=4 et:
13241  */