toke.c

   1 /*    toke.c
   2  *
   3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
   5  *
   6  *    You may distribute under the terms of either the GNU General Public
   7  *    License or the Artistic License, as specified in the README file.
   8  *
   9  */
  10
  11 /*
  12  *  'It all comes from here, the stench and the peril.'    --Frodo
  13  *
  14  *     [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
  15  */
  16
  17 /*
  18  * This file is the lexer for Perl.  It's closely linked to the
  19  * parser, perly.y.
  20  *
  21  * The main routine is yylex(), which returns the next token.
  22  */
  23
  24 /*
  25 =head1 Lexer interface
  26 This is the lower layer of the Perl parser, managing characters and tokens.
  27
  28 =for apidoc AmnU|yy_parser *|PL_parser
  29
  30 Pointer to a structure encapsulating the state of the parsing operation
  31 currently in progress.  The pointer can be locally changed to perform
  32 a nested parse without interfering with the state of an outer parse.
  33 Individual members of C<PL_parser> have their own documentation.
  34
  35 =cut
  36 */
  37
  38 #include "EXTERN.h"
  39 #define PERL_IN_TOKE_C
  40 #include "perl.h"
  41 #include "invlist_inline.h"
  42
  43 #define new_constant(a,b,c,d,e,f,g, h)  \
  44         S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  45
  46 #define pl_yylval       (PL_parser->yylval)
  47
  48 /* XXX temporary backwards compatibility */
  49 #define PL_lex_brackets         (PL_parser->lex_brackets)
  50 #define PL_lex_allbrackets      (PL_parser->lex_allbrackets)
  51 #define PL_lex_fakeeof          (PL_parser->lex_fakeeof)
  52 #define PL_lex_brackstack       (PL_parser->lex_brackstack)
  53 #define PL_lex_casemods         (PL_parser->lex_casemods)
  54 #define PL_lex_casestack        (PL_parser->lex_casestack)
  55 #define PL_lex_dojoin           (PL_parser->lex_dojoin)
  56 #define PL_lex_formbrack        (PL_parser->lex_formbrack)
  57 #define PL_lex_inpat            (PL_parser->lex_inpat)
  58 #define PL_lex_inwhat           (PL_parser->lex_inwhat)
  59 #define PL_lex_op               (PL_parser->lex_op)
  60 #define PL_lex_repl             (PL_parser->lex_repl)
  61 #define PL_lex_starts           (PL_parser->lex_starts)
  62 #define PL_lex_stuff            (PL_parser->lex_stuff)
  63 #define PL_multi_start          (PL_parser->multi_start)
  64 #define PL_multi_open           (PL_parser->multi_open)
  65 #define PL_multi_close          (PL_parser->multi_close)
  66 #define PL_preambled            (PL_parser->preambled)
  67 #define PL_linestr              (PL_parser->linestr)
  68 #define PL_expect               (PL_parser->expect)
  69 #define PL_copline              (PL_parser->copline)
  70 #define PL_bufptr               (PL_parser->bufptr)
  71 #define PL_oldbufptr            (PL_parser->oldbufptr)
  72 #define PL_oldoldbufptr         (PL_parser->oldoldbufptr)
  73 #define PL_linestart            (PL_parser->linestart)
  74 #define PL_bufend               (PL_parser->bufend)
  75 #define PL_last_uni             (PL_parser->last_uni)
  76 #define PL_last_lop             (PL_parser->last_lop)
  77 #define PL_last_lop_op          (PL_parser->last_lop_op)
  78 #define PL_lex_state            (PL_parser->lex_state)
  79 #define PL_rsfp                 (PL_parser->rsfp)
  80 #define PL_rsfp_filters         (PL_parser->rsfp_filters)
  81 #define PL_in_my                (PL_parser->in_my)
  82 #define PL_in_my_stash          (PL_parser->in_my_stash)
  83 #define PL_tokenbuf             (PL_parser->tokenbuf)
  84 #define PL_multi_end            (PL_parser->multi_end)
  85 #define PL_error_count          (PL_parser->error_count)
  86
  87 #  define PL_nexttoke           (PL_parser->nexttoke)
  88 #  define PL_nexttype           (PL_parser->nexttype)
  89 #  define PL_nextval            (PL_parser->nextval)
  90
  91
  92 #define SvEVALED(sv) \
  93     (SvTYPE(sv) >= SVt_PVNV \
  94     && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
  95
  96 static const char ident_too_long[] = "Identifier too long";
  97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
  98
  99 #  define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
 100
 101 #define XENUMMASK  0x3f
 102 #define XFAKEEOF   0x40
 103 #define XFAKEBRACK 0x80
 104
 105 #ifdef USE_UTF8_SCRIPTS
 106 #   define UTF cBOOL(!IN_BYTES)
 107 #else
 108 #   define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
 109 #endif
 110
 111 /* The maximum number of characters preceding the unrecognized one to display */
 112 #define UNRECOGNIZED_PRECEDE_COUNT 10
 113
 114 /* In variables named $^X, these are the legal values for X.
 115  * 1999-02-27 mjd-perl-patch@plover.com */
 116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
 117
 118 /* Non-identifier plugin infix operators are allowed any printing character
 119  * except spaces, digits, or identifier chars
 120  */
 121 #define isPLUGINFIX(c) (c && !isSPACE(c) && !isDIGIT(c) && !isALPHA(c))
 122 /* Plugin infix operators may not begin with a quote symbol */
 123 #define isPLUGINFIX_FIRST(c) (isPLUGINFIX(c) && c != '"' && c != '\'')
 124
 125 #define PLUGINFIX_IS_ENABLED  UNLIKELY(PL_infix_plugin != &Perl_infix_plugin_standard)
 126
 127 #define SPACE_OR_TAB(c) isBLANK_A(c)
 128
 129 #define HEXFP_PEEK(s)     \
 130     (((s[0] == '.') && \
 131       (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
 132      isALPHA_FOLD_EQ(s[0], 'p'))
 133
 134 /* LEX_* are values for PL_lex_state, the state of the lexer.
 135  * They are arranged oddly so that the guard on the switch statement
 136  * can get by with a single comparison (if the compiler is smart enough).
 137  *
 138  * These values refer to the various states within a sublex parse,
 139  * i.e. within a double quotish string
 140  */
 141
 142 /* #define LEX_NOTPARSING               11 is done in perl.h. */
 143
 144 #define LEX_NORMAL              10 /* normal code (ie not within "...")     */
 145 #define LEX_INTERPNORMAL         9 /* code within a string, eg "$foo[$x+1]" */
 146 #define LEX_INTERPCASEMOD        8 /* expecting a \U, \Q or \E etc          */
 147 #define LEX_INTERPPUSH           7 /* starting a new sublex parse level     */
 148 #define LEX_INTERPSTART          6 /* expecting the start of a $var         */
 149
 150                                    /* at end of code, eg "$x" followed by:  */
 151 #define LEX_INTERPEND            5 /* ... eg not one of [, { or ->          */
 152 #define LEX_INTERPENDMAYBE       4 /* ... eg one of [, { or ->              */
 153
 154 #define LEX_INTERPCONCAT         3 /* expecting anything, eg at start of
 155                                         string or after \E, $foo, etc       */
 156 #define LEX_INTERPCONST          2 /* NOT USED */
 157 #define LEX_FORMLINE             1 /* expecting a format line               */
 158
 159 /* returned to yyl_try() to request it to retry the parse loop, expected to only
 160    be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
 161    can also return it.
 162
 163    yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
 164    other token values are 258 or higher (see perly.h), so -1 should be
 165    a safe value here.
 166 */
 167 #define YYL_RETRY (-1)
 168
 169 #ifdef DEBUGGING
 170 static const char* const lex_state_names[] = {
 171     "KNOWNEXT",
 172     "FORMLINE",
 173     "INTERPCONST",
 174     "INTERPCONCAT",
 175     "INTERPENDMAYBE",
 176     "INTERPEND",
 177     "INTERPSTART",
 178     "INTERPPUSH",
 179     "INTERPCASEMOD",
 180     "INTERPNORMAL",
 181     "NORMAL"
 182 };
 183 #endif
 184
 185 #include "keywords.h"
 186
 187 /* CLINE is a macro that ensures PL_copline has a sane value */
 188
 189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
 190
 191 /*
 192  * Convenience functions to return different tokens and prime the
 193  * lexer for the next token.  They all take an argument.
 194  *
 195  * TOKEN        : generic token (used for '(', DOLSHARP, etc)
 196  * OPERATOR     : generic operator
 197  * AOPERATOR    : assignment operator
 198  * PREBLOCK     : beginning the block after an if, while, foreach, ...
 199  * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
 200  * PREREF       : *EXPR where EXPR is not a simple identifier
 201  * TERM         : expression term
 202  * POSTDEREF    : postfix dereference (->$* ->@[...] etc.)
 203  * LOOPX        : loop exiting command (goto, last, dump, etc)
 204  * FTST         : file test operator
 205  * FUN0         : zero-argument function
 206  * FUN0OP       : zero-argument function, with its op created in this file
 207  * FUN1         : not used, except for not, which isn't a UNIOP
 208  * BOop         : bitwise or or xor
 209  * BAop         : bitwise and
 210  * BCop         : bitwise complement
 211  * SHop         : shift operator
 212  * PWop         : power operator
 213  * PMop         : pattern-matching operator
 214  * Aop          : addition-level operator
 215  * AopNOASSIGN  : addition-level operator that is never part of .=
 216  * Mop          : multiplication-level operator
 217  * ChEop        : chaining equality-testing operator
 218  * NCEop        : non-chaining comparison operator at equality precedence
 219  * ChRop        : chaining relational operator <= != gt
 220  * NCRop        : non-chaining relational operator isa
 221  *
 222  * Also see LOP and lop() below.
 223  */
 224
 225 #ifdef DEBUGGING /* Serve -DT. */
 226 #   define REPORT(retval) tokereport((I32)retval, &pl_yylval)
 227 #else
 228 #   define REPORT(retval) (retval)
 229 #endif
 230
 231 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
 232 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
 233 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
 234 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
 235 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
 236 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
 237 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
 238 #define PHASERBLOCK(f) return (pl_yylval.ival=f, PL_expect = XBLOCK, PL_bufptr = s, REPORT((int)PHASER))
 239 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
 240 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
 241                          pl_yylval.ival=f, \
 242                          PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
 243                          REPORT((int)LOOPEX))
 244 #define FTST(f)  return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
 245 #define FUN0(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
 246 #define FUN0OP(f)  return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
 247 #define FUN1(f)  return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
 248 #define BOop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
 249 #define BAop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
 250 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
 251                        REPORT(PERLY_TILDE)
 252 #define SHop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
 253 #define PWop(f)  return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
 254 #define PMop(f)  return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
 255 #define Aop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
 256 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
 257 #define Mop(f)   return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
 258 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
 259 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
 260 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
 261 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
 262
 263 /* This bit of chicanery makes a unary function followed by
 264  * a parenthesis into a function with one argument, highest precedence.
 265  * The UNIDOR macro is for unary functions that can be followed by the //
 266  * operator (such as C<shift // 0>).
 267  */
 268 #define UNI3(f,x,have_x) { \
 269         pl_yylval.ival = f; \
 270         if (have_x) PL_expect = x; \
 271         PL_bufptr = s; \
 272         PL_last_uni = PL_oldbufptr; \
 273         PL_last_lop_op = (f) < 0 ? -(f) : (f); \
 274         if (*s == '(') \
 275             return REPORT( (int)FUNC1 ); \
 276         s = skipspace(s); \
 277         return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
 278         }
 279 #define UNI(f)    UNI3(f,XTERM,1)
 280 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
 281 #define UNIPROTO(f,optional) { \
 282         if (optional) PL_last_uni = PL_oldbufptr; \
 283         OPERATOR(f); \
 284         }
 285
 286 #define UNIBRACK(f) UNI3(f,0,0)
 287
 288 /* return has special case parsing.
 289  *
 290  * List operators have low precedence. Functions have high precedence.
 291  * Every built in, *except return*, if written with () around its arguments, is
 292  * parsed as a function. Hence every other list built in:
 293  *
 294  * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
 295  * 429
 296  * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
 297  * 639
 298  * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
 299  * Useless use of a constant (2) in void context at -e line 1.
 300  * Useless use of a constant (4) in void context at -e line 1.
 301  *
 302  * $
 303  *
 304  * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
 305  * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
 306  *
 307  * Whereas return:
 308  *
 309  * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
 310  * 2
 311  * 4
 312  * 9
 313  * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
 314  * Useless use of a constant (2) in void context at -e line 1.
 315  * Useless use of a constant (4) in void context at -e line 1.
 316  * 9
 317  * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
 318  * Useless use of a constant (2) in void context at -e line 1.
 319  * Useless use of a constant (4) in void context at -e line 1.
 320  * 9
 321  * $
 322  *
 323  * and:
 324  * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
 325  * 2
 326  * 4
 327  * 6
 328  *
 329  * This last example is what we expect, but it's clearly inconsistent with how
 330  * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
 331  * followed.
 332  *
 333  *
 334  * Perl 3 attempted to be consistent:
 335  *
 336  *   The rules are more consistent about where parens are needed and
 337  *   where they are not.  In particular, unary operators and list operators now
 338  *   behave like functions if they're called like functions.
 339  *
 340  * However, the behaviour for return was reverted to the "old" parsing with
 341  * patches 9-12:
 342  *
 343  *   The construct
 344  *   return (1,2,3);
 345  *   did not do what was expected, since return was swallowing the
 346  *   parens in order to consider itself a function.  The solution,
 347  *   since return never wants any trailing expression such as
 348  *   return (1,2,3) + 2;
 349  *   is to simply make return an exception to the paren-makes-a-function
 350  *   rule, and treat it the way it always was, so that it doesn't
 351  *   strip the parens.
 352  *
 353  * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
 354  * LOP(OP_RETURN, XTERM);
 355  *
 356  * and constructs such as
 357  *
 358  *     return (Internals::V())[2]
 359  *
 360  * turn into syntax errors
 361  */
 362
 363 #define OLDLOP(f) \
 364         do { \
 365             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
 366                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
 367             pl_yylval.ival = (f); \
 368             PL_expect = XTERM; \
 369             PL_bufptr = s; \
 370             return (int)LSTOP; \
 371         } while(0)
 372
 373 #define COPLINE_INC_WITH_HERELINES                  \
 374     STMT_START {                                     \
 375         CopLINE_inc(PL_curcop);                       \
 376         if (PL_parser->herelines)                      \
 377             CopLINE(PL_curcop) += PL_parser->herelines, \
 378             PL_parser->herelines = 0;                    \
 379     } STMT_END
 380 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
 381  * is no sublex_push to follow. */
 382 #define COPLINE_SET_FROM_MULTI_END            \
 383     STMT_START {                               \
 384         CopLINE_set(PL_curcop, PL_multi_end);   \
 385         if (PL_multi_end != PL_multi_start)      \
 386             PL_parser->herelines = 0;             \
 387     } STMT_END
 388
 389
 390 /* A file-local structure for passing around information about subroutines and
 391  * related definable words */
 392 struct code {
 393     SV *sv;
 394     CV *cv;
 395     GV *gv, **gvp;
 396     OP *rv2cv_op;
 397     PADOFFSET off;
 398     bool lex;
 399 };
 400
 401 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
 402
 403 #ifdef DEBUGGING
 404
 405 /* how to interpret the pl_yylval associated with the token */
 406 enum token_type {
 407     TOKENTYPE_NONE,
 408     TOKENTYPE_IVAL,
 409     TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
 410     TOKENTYPE_PVAL,
 411     TOKENTYPE_OPVAL
 412 };
 413
 414 #define DEBUG_TOKEN(Type, Name)                                         \
 415     { Name, TOKENTYPE_##Type, #Name }
 416
 417 static struct debug_tokens {
 418     const int token;
 419     enum token_type type;
 420     const char *name;
 421 } const debug_tokens[] =
 422 {
 423     DEBUG_TOKEN (OPNUM, ADDOP),
 424     DEBUG_TOKEN (NONE,  ANDAND),
 425     DEBUG_TOKEN (NONE,  ANDOP),
 426     DEBUG_TOKEN (NONE,  ARROW),
 427     DEBUG_TOKEN (OPNUM, ASSIGNOP),
 428     DEBUG_TOKEN (OPNUM, BITANDOP),
 429     DEBUG_TOKEN (OPNUM, BITOROP),
 430     DEBUG_TOKEN (OPNUM, CHEQOP),
 431     DEBUG_TOKEN (OPNUM, CHRELOP),
 432     DEBUG_TOKEN (NONE,  COLONATTR),
 433     DEBUG_TOKEN (NONE,  DOLSHARP),
 434     DEBUG_TOKEN (NONE,  DORDOR),
 435     DEBUG_TOKEN (IVAL,  DOTDOT),
 436     DEBUG_TOKEN (NONE,  FORMLBRACK),
 437     DEBUG_TOKEN (NONE,  FORMRBRACK),
 438     DEBUG_TOKEN (OPNUM, FUNC),
 439     DEBUG_TOKEN (OPNUM, FUNC0),
 440     DEBUG_TOKEN (OPVAL, FUNC0OP),
 441     DEBUG_TOKEN (OPVAL, FUNC0SUB),
 442     DEBUG_TOKEN (OPNUM, FUNC1),
 443     DEBUG_TOKEN (NONE,  HASHBRACK),
 444     DEBUG_TOKEN (IVAL,  KW_CATCH),
 445     DEBUG_TOKEN (IVAL,  KW_CLASS),
 446     DEBUG_TOKEN (IVAL,  KW_CONTINUE),
 447     DEBUG_TOKEN (IVAL,  KW_DEFAULT),
 448     DEBUG_TOKEN (IVAL,  KW_DO),
 449     DEBUG_TOKEN (IVAL,  KW_ELSE),
 450     DEBUG_TOKEN (IVAL,  KW_ELSIF),
 451     DEBUG_TOKEN (IVAL,  KW_FIELD),
 452     DEBUG_TOKEN (IVAL,  KW_GIVEN),
 453     DEBUG_TOKEN (IVAL,  KW_FOR),
 454     DEBUG_TOKEN (IVAL,  KW_FORMAT),
 455     DEBUG_TOKEN (IVAL,  KW_IF),
 456     DEBUG_TOKEN (IVAL,  KW_LOCAL),
 457     DEBUG_TOKEN (IVAL,  KW_METHOD_anon),
 458     DEBUG_TOKEN (IVAL,  KW_METHOD_named),
 459     DEBUG_TOKEN (IVAL,  KW_MY),
 460     DEBUG_TOKEN (IVAL,  KW_PACKAGE),
 461     DEBUG_TOKEN (IVAL,  KW_REQUIRE),
 462     DEBUG_TOKEN (IVAL,  KW_SUB_anon),
 463     DEBUG_TOKEN (IVAL,  KW_SUB_anon_sig),
 464     DEBUG_TOKEN (IVAL,  KW_SUB_named),
 465     DEBUG_TOKEN (IVAL,  KW_SUB_named_sig),
 466     DEBUG_TOKEN (IVAL,  KW_TRY),
 467     DEBUG_TOKEN (IVAL,  KW_USE_or_NO),
 468     DEBUG_TOKEN (IVAL,  KW_UNLESS),
 469     DEBUG_TOKEN (IVAL,  KW_UNTIL),
 470     DEBUG_TOKEN (IVAL,  KW_WHEN),
 471     DEBUG_TOKEN (IVAL,  KW_WHILE),
 472     DEBUG_TOKEN (OPVAL, LABEL),
 473     DEBUG_TOKEN (OPNUM, LOOPEX),
 474     DEBUG_TOKEN (OPNUM, LSTOP),
 475     DEBUG_TOKEN (OPVAL, LSTOPSUB),
 476     DEBUG_TOKEN (OPNUM, MATCHOP),
 477     DEBUG_TOKEN (OPVAL, METHCALL),
 478     DEBUG_TOKEN (OPVAL, METHCALL0),
 479     DEBUG_TOKEN (OPNUM, MULOP),
 480     DEBUG_TOKEN (OPNUM, NCEQOP),
 481     DEBUG_TOKEN (OPNUM, NCRELOP),
 482     DEBUG_TOKEN (NONE,  NOAMP),
 483     DEBUG_TOKEN (NONE,  NOTOP),
 484     DEBUG_TOKEN (IVAL,  OROP),
 485     DEBUG_TOKEN (NONE,  OROR),
 486     DEBUG_TOKEN (IVAL,  PERLY_AMPERSAND),
 487     DEBUG_TOKEN (IVAL,  PERLY_BRACE_CLOSE),
 488     DEBUG_TOKEN (IVAL,  PERLY_BRACE_OPEN),
 489     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_CLOSE),
 490     DEBUG_TOKEN (IVAL,  PERLY_BRACKET_OPEN),
 491     DEBUG_TOKEN (IVAL,  PERLY_COLON),
 492     DEBUG_TOKEN (IVAL,  PERLY_COMMA),
 493     DEBUG_TOKEN (IVAL,  PERLY_DOT),
 494     DEBUG_TOKEN (IVAL,  PERLY_EQUAL_SIGN),
 495     DEBUG_TOKEN (IVAL,  PERLY_EXCLAMATION_MARK),
 496     DEBUG_TOKEN (IVAL,  PERLY_MINUS),
 497     DEBUG_TOKEN (IVAL,  PERLY_PAREN_OPEN),
 498     DEBUG_TOKEN (IVAL,  PERLY_PERCENT_SIGN),
 499     DEBUG_TOKEN (IVAL,  PERLY_PLUS),
 500     DEBUG_TOKEN (IVAL,  PERLY_QUESTION_MARK),
 501     DEBUG_TOKEN (IVAL,  PERLY_SEMICOLON),
 502     DEBUG_TOKEN (IVAL,  PERLY_SLASH),
 503     DEBUG_TOKEN (IVAL,  PERLY_SNAIL),
 504     DEBUG_TOKEN (IVAL,  PERLY_STAR),
 505     DEBUG_TOKEN (IVAL,  PERLY_TILDE),
 506     DEBUG_TOKEN (OPVAL, PLUGEXPR),
 507     DEBUG_TOKEN (OPVAL, PLUGSTMT),
 508     DEBUG_TOKEN (PVAL,  PLUGIN_ADD_OP),
 509     DEBUG_TOKEN (PVAL,  PLUGIN_ASSIGN_OP),
 510     DEBUG_TOKEN (PVAL,  PLUGIN_HIGH_OP),
 511     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_OP),
 512     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_OP),
 513     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_AND_LOW_OP),
 514     DEBUG_TOKEN (PVAL,  PLUGIN_LOGICAL_OR_LOW_OP),
 515     DEBUG_TOKEN (PVAL,  PLUGIN_LOW_OP),
 516     DEBUG_TOKEN (PVAL,  PLUGIN_MUL_OP),
 517     DEBUG_TOKEN (PVAL,  PLUGIN_POW_OP),
 518     DEBUG_TOKEN (PVAL,  PLUGIN_REL_OP),
 519     DEBUG_TOKEN (OPVAL, PMFUNC),
 520     DEBUG_TOKEN (NONE,  POSTJOIN),
 521     DEBUG_TOKEN (NONE,  POSTDEC),
 522     DEBUG_TOKEN (NONE,  POSTINC),
 523     DEBUG_TOKEN (OPNUM, POWOP),
 524     DEBUG_TOKEN (NONE,  PREDEC),
 525     DEBUG_TOKEN (NONE,  PREINC),
 526     DEBUG_TOKEN (OPVAL, PRIVATEREF),
 527     DEBUG_TOKEN (OPVAL, QWLIST),
 528     DEBUG_TOKEN (NONE,  REFGEN),
 529     DEBUG_TOKEN (OPNUM, SHIFTOP),
 530     DEBUG_TOKEN (NONE,  SUBLEXEND),
 531     DEBUG_TOKEN (NONE,  SUBLEXSTART),
 532     DEBUG_TOKEN (OPVAL, THING),
 533     DEBUG_TOKEN (NONE,  UMINUS),
 534     DEBUG_TOKEN (OPNUM, UNIOP),
 535     DEBUG_TOKEN (OPVAL, UNIOPSUB),
 536     DEBUG_TOKEN (OPVAL, BAREWORD),
 537     DEBUG_TOKEN (IVAL,  YADAYADA),
 538     { 0,                TOKENTYPE_NONE,         NULL }
 539 };
 540
 541 #undef DEBUG_TOKEN
 542
 543 /* dump the returned token in rv, plus any optional arg in pl_yylval */
 544
 545 STATIC int
 546 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
 547 {
 548     PERL_ARGS_ASSERT_TOKEREPORT;
 549
 550     if (DEBUG_T_TEST) {
 551         const char *name = NULL;
 552         enum token_type type = TOKENTYPE_NONE;
 553         const struct debug_tokens *p;
 554         SV* const report = newSVpvs("<== ");
 555
 556         for (p = debug_tokens; p->token; p++) {
 557             if (p->token == (int)rv) {
 558                 name = p->name;
 559                 type = p->type;
 560                 break;
 561             }
 562         }
 563         if (name)
 564             Perl_sv_catpv(aTHX_ report, name);
 565         else if (isGRAPH(rv))
 566         {
 567             Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
 568             if ((char)rv == 'p')
 569                 sv_catpvs(report, " (pending identifier)");
 570         }
 571         else if (!rv)
 572             sv_catpvs(report, "EOF");
 573         else
 574             Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
 575         switch (type) {
 576         case TOKENTYPE_NONE:
 577             break;
 578         case TOKENTYPE_IVAL:
 579             Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
 580             break;
 581         case TOKENTYPE_OPNUM:
 582             Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
 583                                     PL_op_name[lvalp->ival]);
 584             break;
 585         case TOKENTYPE_PVAL:
 586             Perl_sv_catpvf(aTHX_ report, "(pval=%p)", lvalp->pval);
 587             break;
 588         case TOKENTYPE_OPVAL:
 589             if (lvalp->opval) {
 590                 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
 591                                     PL_op_name[lvalp->opval->op_type]);
 592                 if (lvalp->opval->op_type == OP_CONST) {
 593                     Perl_sv_catpvf(aTHX_ report, " %s",
 594                         SvPEEK(cSVOPx_sv(lvalp->opval)));
 595                 }
 596
 597             }
 598             else
 599                 sv_catpvs(report, "(opval=null)");
 600             break;
 601         }
 602         PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
 603     };
 604     return (int)rv;
 605 }
 606
 607
 608 /* print the buffer with suitable escapes */
 609
 610 STATIC void
 611 S_printbuf(pTHX_ const char *const fmt, const char *const s)
 612 {
 613     SV* const tmp = newSVpvs("");
 614
 615     PERL_ARGS_ASSERT_PRINTBUF;
 616
 617     GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
 618     PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
 619     GCC_DIAG_RESTORE_STMT;
 620     SvREFCNT_dec(tmp);
 621 }
 622
 623 #endif
 624
 625 /*
 626  * S_ao
 627  *
 628  * This subroutine looks for an '=' next to the operator that has just been
 629  * parsed and turns it into an ASSIGNOP if it finds one.
 630  */
 631
 632 STATIC int
 633 S_ao(pTHX_ int toketype)
 634 {
 635     if (*PL_bufptr == '=') {
 636         PL_bufptr++;
 637
 638         switch (toketype) {
 639             case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
 640             case OROR:   pl_yylval.ival = OP_ORASSIGN;  break;
 641             case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
 642         }
 643
 644         toketype = ASSIGNOP;
 645     }
 646     return REPORT(toketype);
 647 }
 648
 649 /*
 650  * S_no_op
 651  * When Perl expects an operator and finds something else, no_op
 652  * prints the warning.  It always prints "<something> found where
 653  * operator expected.  It prints "Missing semicolon on previous line?"
 654  * if the surprise occurs at the start of the line.  "do you need to
 655  * predeclare ..." is printed out for code like "sub bar; foo bar $x"
 656  * where the compiler doesn't know if foo is a method call or a function.
 657  * It prints "Missing operator before end of line" if there's nothing
 658  * after the missing operator, or "... before <...>" if there is something
 659  * after the missing operator.
 660  *
 661  * PL_bufptr is expected to point to the start of the thing that was found,
 662  * and s after the next token or partial token.
 663  */
 664
 665 STATIC void
 666 S_no_op(pTHX_ const char *const what, char *s)
 667 {
 668     char * const oldbp = PL_bufptr;
 669     const bool is_first = (PL_oldbufptr == PL_linestart);
 670     SV *message = sv_2mortal( newSVpvf(
 671                    PERL_DIAG_WARN_SYNTAX("%s found where operator expected"),
 672                    what
 673                   ) );
 674
 675     PERL_ARGS_ASSERT_NO_OP;
 676
 677     if (!s)
 678         s = oldbp;
 679     else
 680         PL_bufptr = s;
 681
 682     if (ckWARN_d(WARN_SYNTAX)) {
 683         bool has_more = FALSE;
 684         if (is_first) {
 685             has_more = TRUE;
 686             sv_catpvs(message,
 687                     " (Missing semicolon on previous line?)");
 688         }
 689         else if (PL_oldoldbufptr) {
 690             /* yyerror (via yywarn) would do this itself, so we should too */
 691             const char *t;
 692             for (t = PL_oldoldbufptr;
 693                  t < PL_bufptr && isSPACE(*t);
 694                  t += UTF ? UTF8SKIP(t) : 1)
 695             {
 696                 NOOP;
 697             }
 698             /* see if we can identify the cause of the warning */
 699             if (isIDFIRST_lazy_if_safe(t,PL_bufend,UTF))
 700             {
 701                 const char *t_start= t;
 702                 for ( ;
 703                      (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
 704                      t += UTF ? UTF8SKIP(t) : 1)
 705                 {
 706                     NOOP;
 707                 }
 708                 if (t < PL_bufptr && isSPACE(*t)) {
 709                     has_more = TRUE;
 710                     sv_catpvf( message,
 711                             " (Do you need to predeclare \"%" UTF8f "\"?)",
 712                           UTF8fARG(UTF, t - t_start, t_start));
 713                 }
 714             }
 715         }
 716         if (!has_more) {
 717             const char *t= oldbp;
 718             assert(s >= oldbp);
 719             while (t < s && isSPACE(*t)) {
 720                 t += UTF ? UTF8SKIP(t) : 1;
 721             }
 722
 723             sv_catpvf(message,
 724                     " (Missing operator before \"%" UTF8f "\"?)",
 725                      UTF8fARG(UTF, s - t, t));
 726         }
 727     }
 728     yywarn(SvPV_nolen(message), UTF ? SVf_UTF8 : 0);
 729     PL_bufptr = oldbp;
 730 }
 731
 732 /*
 733  * S_missingterm
 734  * Complain about missing quote/regexp/heredoc terminator.
 735  * If it's called with NULL then it cauterizes the line buffer.
 736  * If we're in a delimited string and the delimiter is a control
 737  * character, it's reformatted into a two-char sequence like ^C.
 738  * This is fatal.
 739  */
 740
 741 STATIC void
 742 S_missingterm(pTHX_ char *s, STRLEN len)
 743 {
 744     char tmpbuf[UTF8_MAXBYTES + 1];
 745     char q;
 746     bool uni = FALSE;
 747     if (s) {
 748         char * const nl = (char *) my_memrchr(s, '\n', len);
 749         if (nl) {
 750             *nl = '\0';
 751             len = nl - s;
 752         }
 753         uni = UTF;
 754     }
 755     else if (PL_multi_close < 32) {
 756         *tmpbuf = '^';
 757         tmpbuf[1] = (char)toCTRL(PL_multi_close);
 758         tmpbuf[2] = '\0';
 759         s = tmpbuf;
 760         len = 2;
 761     }
 762     else {
 763         if (! UTF && LIKELY(PL_multi_close < 256)) {
 764             *tmpbuf = (char)PL_multi_close;
 765             tmpbuf[1] = '\0';
 766             len = 1;
 767         }
 768         else {
 769             char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
 770             *end = '\0';
 771             len = end - tmpbuf;
 772             uni = TRUE;
 773         }
 774         s = tmpbuf;
 775     }
 776     q = memchr(s, '"', len) ? '\'' : '"';
 777     Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
 778                      " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
 779 }
 780
 781 #include "feature.h"
 782
 783 /*
 784  * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
 785  * utf16-to-utf8-reversed.
 786  */
 787
 788 #ifdef PERL_CR_FILTER
 789 static void
 790 strip_return(SV *sv)
 791 {
 792     const char *s = SvPVX_const(sv);
 793     const char * const e = s + SvCUR(sv);
 794
 795     PERL_ARGS_ASSERT_STRIP_RETURN;
 796
 797     /* outer loop optimized to do nothing if there are no CR-LFs */
 798     while (s < e) {
 799         if (*s++ == '\r' && *s == '\n') {
 800             /* hit a CR-LF, need to copy the rest */
 801             char *d = s - 1;
 802             *d++ = *s++;
 803             while (s < e) {
 804                 if (*s == '\r' && s[1] == '\n')
 805                     s++;
 806                 *d++ = *s++;
 807             }
 808             SvCUR(sv) -= s - d;
 809             return;
 810         }
 811     }
 812 }
 813
 814 STATIC I32
 815 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
 816 {
 817     const I32 count = FILTER_READ(idx+1, sv, maxlen);
 818     if (count > 0 && !maxlen)
 819         strip_return(sv);
 820     return count;
 821 }
 822 #endif
 823
 824 /*
 825 =for apidoc lex_start
 826
 827 Creates and initialises a new lexer/parser state object, supplying
 828 a context in which to lex and parse from a new source of Perl code.
 829 A pointer to the new state object is placed in L</PL_parser>.  An entry
 830 is made on the save stack so that upon unwinding, the new state object
 831 will be destroyed and the former value of L</PL_parser> will be restored.
 832 Nothing else need be done to clean up the parsing context.
 833
 834 The code to be parsed comes from C<line> and C<rsfp>.  C<line>, if
 835 non-null, provides a string (in SV form) containing code to be parsed.
 836 A copy of the string is made, so subsequent modification of C<line>
 837 does not affect parsing.  C<rsfp>, if non-null, provides an input stream
 838 from which code will be read to be parsed.  If both are non-null, the
 839 code in C<line> comes first and must consist of complete lines of input,
 840 and C<rsfp> supplies the remainder of the source.
 841
 842 The C<flags> parameter is reserved for future use.  Currently it is only
 843 used by perl internally, so extensions should always pass zero.
 844
 845 =cut
 846 */
 847
 848 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
 849    can share filters with the current parser.
 850    LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
 851    caller, hence isn't owned by the parser, so shouldn't be closed on parser
 852    destruction. This is used to handle the case of defaulting to reading the
 853    script from the standard input because no filename was given on the command
 854    line (without getting confused by situation where STDIN has been closed, so
 855    the script handle is opened on fd 0)  */
 856
 857 void
 858 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
 859 {
 860     const char *s = NULL;
 861     yy_parser *parser, *oparser;
 862
 863     if (flags && flags & ~LEX_START_FLAGS)
 864         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
 865
 866     /* create and initialise a parser */
 867
 868     Newxz(parser, 1, yy_parser);
 869     parser->old_parser = oparser = PL_parser;
 870     PL_parser = parser;
 871
 872     parser->stack = NULL;
 873     parser->stack_max1 = NULL;
 874     parser->ps = NULL;
 875
 876     /* on scope exit, free this parser and restore any outer one */
 877     SAVEPARSER(parser);
 878     parser->saved_curcop = PL_curcop;
 879
 880     /* initialise lexer state */
 881
 882     parser->nexttoke = 0;
 883     parser->error_count = oparser ? oparser->error_count : 0;
 884     parser->copline = parser->preambling = NOLINE;
 885     parser->lex_state = LEX_NORMAL;
 886     parser->expect = XSTATE;
 887     parser->rsfp = rsfp;
 888     parser->recheck_utf8_validity = TRUE;
 889     parser->rsfp_filters =
 890       !(flags & LEX_START_SAME_FILTER) || !oparser
 891         ? NULL
 892         : MUTABLE_AV(SvREFCNT_inc(
 893             oparser->rsfp_filters
 894              ? oparser->rsfp_filters
 895              : (oparser->rsfp_filters = newAV())
 896           ));
 897
 898     Newx(parser->lex_brackstack, 120, char);
 899     Newx(parser->lex_casestack, 12, char);
 900     *parser->lex_casestack = '\0';
 901     Newxz(parser->lex_shared, 1, LEXSHARED);
 902
 903     if (line) {
 904         Size_t len;
 905         const U8* first_bad_char_loc;
 906
 907         s = SvPV_const(line, len);
 908
 909         if (   SvUTF8(line)
 910             && UNLIKELY(! is_utf8_string_loc((U8 *) s,
 911                                              SvCUR(line),
 912                                              &first_bad_char_loc)))
 913         {
 914             _force_out_malformed_utf8_message(first_bad_char_loc,
 915                                               (U8 *) s + SvCUR(line),
 916                                               0,
 917                                               1 /* 1 means die */ );
 918             NOT_REACHED; /* NOTREACHED */
 919         }
 920
 921         parser->linestr = flags & LEX_START_COPIED
 922                             ? SvREFCNT_inc_simple_NN(line)
 923                             : newSVpvn_flags(s, len, SvUTF8(line));
 924         if (!rsfp)
 925             sv_catpvs(parser->linestr, "\n;");
 926     } else {
 927         parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
 928     }
 929
 930     parser->oldoldbufptr =
 931         parser->oldbufptr =
 932         parser->bufptr =
 933         parser->linestart = SvPVX(parser->linestr);
 934     parser->bufend = parser->bufptr + SvCUR(parser->linestr);
 935     parser->last_lop = parser->last_uni = NULL;
 936
 937     STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 938                                                         |LEX_DONT_CLOSE_RSFP));
 939     parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
 940                                                         |LEX_DONT_CLOSE_RSFP));
 941
 942     parser->in_pod = parser->filtered = 0;
 943 }
 944
 945
 946 /* delete a parser object */
 947
 948 void
 949 Perl_parser_free(pTHX_  const yy_parser *parser)
 950 {
 951     PERL_ARGS_ASSERT_PARSER_FREE;
 952
 953     PL_curcop = parser->saved_curcop;
 954     SvREFCNT_dec(parser->linestr);
 955
 956     if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
 957         PerlIO_clearerr(parser->rsfp);
 958     else if (parser->rsfp && (!parser->old_parser
 959           || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
 960         PerlIO_close(parser->rsfp);
 961     SvREFCNT_dec(parser->rsfp_filters);
 962     SvREFCNT_dec(parser->lex_stuff);
 963     SvREFCNT_dec(parser->lex_sub_repl);
 964
 965     Safefree(parser->lex_brackstack);
 966     Safefree(parser->lex_casestack);
 967     Safefree(parser->lex_shared);
 968     PL_parser = parser->old_parser;
 969     Safefree(parser);
 970 }
 971
 972 void
 973 Perl_parser_free_nexttoke_ops(pTHX_  yy_parser *parser, OPSLAB *slab)
 974 {
 975     I32 nexttoke = parser->nexttoke;
 976     PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
 977     while (nexttoke--) {
 978         if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
 979          && parser->nextval[nexttoke].opval
 980          && parser->nextval[nexttoke].opval->op_slabbed
 981          && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
 982             op_free(parser->nextval[nexttoke].opval);
 983             parser->nextval[nexttoke].opval = NULL;
 984         }
 985     }
 986 }
 987
 988
 989 /*
 990 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
 991
 992 Buffer scalar containing the chunk currently under consideration of the
 993 text currently being lexed.  This is always a plain string scalar (for
 994 which C<SvPOK> is true).  It is not intended to be used as a scalar by
 995 normal scalar means; instead refer to the buffer directly by the pointer
 996 variables described below.
 997
 998 The lexer maintains various C<char*> pointers to things in the
 999 C<PL_parser-E<gt>linestr> buffer.  If C<PL_parser-E<gt>linestr> is ever
1000 reallocated, all of these pointers must be updated.  Don't attempt to
1001 do this manually, but rather use L</lex_grow_linestr> if you need to
1002 reallocate the buffer.
1003
1004 The content of the text chunk in the buffer is commonly exactly one
1005 complete line of input, up to and including a newline terminator,
1006 but there are situations where it is otherwise.  The octets of the
1007 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
1008 The function L</lex_bufutf8> tells you which.  Do not use the C<SvUTF8>
1009 flag on this scalar, which may disagree with it.
1010
1011 For direct examination of the buffer, the variable
1012 L</PL_parser-E<gt>bufend> points to the end of the buffer.  The current
1013 lexing position is pointed to by L</PL_parser-E<gt>bufptr>.  Direct use
1014 of these pointers is usually preferable to examination of the scalar
1015 through normal scalar means.
1016
1017 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
1018
1019 Direct pointer to the end of the chunk of text currently being lexed, the
1020 end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
1021 + SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
1022 always located at the end of the buffer, and does not count as part of
1023 the buffer's contents.
1024
1025 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
1026
1027 Points to the current position of lexing inside the lexer buffer.
1028 Characters around this point may be freely examined, within
1029 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
1030 L</PL_parser-E<gt>bufend>.  The octets of the buffer may be intended to be
1031 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
1032
1033 Lexing code (whether in the Perl core or not) moves this pointer past
1034 the characters that it consumes.  It is also expected to perform some
1035 bookkeeping whenever a newline character is consumed.  This movement
1036 can be more conveniently performed by the function L</lex_read_to>,
1037 which handles newlines appropriately.
1038
1039 Interpretation of the buffer's octets can be abstracted out by
1040 using the slightly higher-level functions L</lex_peek_unichar> and
1041 L</lex_read_unichar>.
1042
1043 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
1044
1045 Points to the start of the current line inside the lexer buffer.
1046 This is useful for indicating at which column an error occurred, and
1047 not much else.  This must be updated by any lexing code that consumes
1048 a newline; the function L</lex_read_to> handles this detail.
1049
1050 =cut
1051 */
1052
1053 /*
1054 =for apidoc lex_bufutf8
1055
1056 Indicates whether the octets in the lexer buffer
1057 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1058 of Unicode characters.  If not, they should be interpreted as Latin-1
1059 characters.  This is analogous to the C<SvUTF8> flag for scalars.
1060
1061 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1062 contains valid UTF-8.  Lexing code must be robust in the face of invalid
1063 encoding.
1064
1065 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1066 is significant, but not the whole story regarding the input character
1067 encoding.  Normally, when a file is being read, the scalar contains octets
1068 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1069 UTF-8 if the C<use utf8> pragma is in effect.  During a string eval,
1070 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1071 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1072 is in effect.  This logic may change in the future; use this function
1073 instead of implementing the logic yourself.
1074
1075 =cut
1076 */
1077
1078 bool
1079 Perl_lex_bufutf8(pTHX)
1080 {
1081     return UTF;
1082 }
1083
1084 /*
1085 =for apidoc lex_grow_linestr
1086
1087 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1088 at least C<len> octets (including terminating C<NUL>).  Returns a
1089 pointer to the reallocated buffer.  This is necessary before making
1090 any direct modification of the buffer that would increase its length.
1091 L</lex_stuff_pvn> provides a more convenient way to insert text into
1092 the buffer.
1093
1094 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1095 this function updates all of the lexer's variables that point directly
1096 into the buffer.
1097
1098 =cut
1099 */
1100
1101 char *
1102 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1103 {
1104     SV *linestr;
1105     char *buf;
1106     STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1107     STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1108     bool current;
1109
1110     linestr = PL_parser->linestr;
1111     buf = SvPVX(linestr);
1112     if (len <= SvLEN(linestr))
1113         return buf;
1114
1115     /* Is the lex_shared linestr SV the same as the current linestr SV?
1116      * Only in this case does re_eval_start need adjusting, since it
1117      * points within lex_shared->ls_linestr's buffer */
1118     current = (   !PL_parser->lex_shared->ls_linestr
1119                || linestr == PL_parser->lex_shared->ls_linestr);
1120
1121     bufend_pos = PL_parser->bufend - buf;
1122     bufptr_pos = PL_parser->bufptr - buf;
1123     oldbufptr_pos = PL_parser->oldbufptr - buf;
1124     oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1125     linestart_pos = PL_parser->linestart - buf;
1126     last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1127     last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1128     re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1129                             PL_parser->lex_shared->re_eval_start - buf : 0;
1130
1131     buf = sv_grow(linestr, len);
1132
1133     PL_parser->bufend = buf + bufend_pos;
1134     PL_parser->bufptr = buf + bufptr_pos;
1135     PL_parser->oldbufptr = buf + oldbufptr_pos;
1136     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1137     PL_parser->linestart = buf + linestart_pos;
1138     if (PL_parser->last_uni)
1139         PL_parser->last_uni = buf + last_uni_pos;
1140     if (PL_parser->last_lop)
1141         PL_parser->last_lop = buf + last_lop_pos;
1142     if (current && PL_parser->lex_shared->re_eval_start)
1143         PL_parser->lex_shared->re_eval_start  = buf + re_eval_start_pos;
1144     return buf;
1145 }
1146
1147 /*
1148 =for apidoc lex_stuff_pvn
1149
1150 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1151 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1152 reallocating the buffer if necessary.  This means that lexing code that
1153 runs later will see the characters as if they had appeared in the input.
1154 It is not recommended to do this as part of normal parsing, and most
1155 uses of this facility run the risk of the inserted characters being
1156 interpreted in an unintended manner.
1157
1158 The string to be inserted is represented by C<len> octets starting
1159 at C<pv>.  These octets are interpreted as either UTF-8 or Latin-1,
1160 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1161 The characters are recoded for the lexer buffer, according to how the
1162 buffer is currently being interpreted (L</lex_bufutf8>).  If a string
1163 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1164 function is more convenient.
1165
1166 =for apidoc Amnh||LEX_STUFF_UTF8
1167
1168 =cut
1169 */
1170
1171 void
1172 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1173 {
1174     char *bufptr;
1175     PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1176     if (flags & ~(LEX_STUFF_UTF8))
1177         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1178     if (UTF) {
1179         if (flags & LEX_STUFF_UTF8) {
1180             goto plain_copy;
1181         } else {
1182             STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1183                                                        (U8 *) pv + len);
1184             const char *p, *e = pv+len;;
1185             if (!highhalf)
1186                 goto plain_copy;
1187             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1188             bufptr = PL_parser->bufptr;
1189             Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1190             SvCUR_set(PL_parser->linestr,
1191                 SvCUR(PL_parser->linestr) + len+highhalf);
1192             PL_parser->bufend += len+highhalf;
1193             for (p = pv; p != e; p++) {
1194                 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1195             }
1196         }
1197     } else {
1198         if (flags & LEX_STUFF_UTF8) {
1199             STRLEN highhalf = 0;
1200             const char *p, *e = pv+len;
1201             for (p = pv; p != e; p++) {
1202                 U8 c = (U8)*p;
1203                 if (UTF8_IS_ABOVE_LATIN1(c)) {
1204                     Perl_croak(aTHX_ "Lexing code attempted to stuff "
1205                                 "non-Latin-1 character into Latin-1 input");
1206                 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1207                     p++;
1208                     highhalf++;
1209                 } else assert(UTF8_IS_INVARIANT(c));
1210             }
1211             if (!highhalf)
1212                 goto plain_copy;
1213             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1214             bufptr = PL_parser->bufptr;
1215             Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1216             SvCUR_set(PL_parser->linestr,
1217                 SvCUR(PL_parser->linestr) + len-highhalf);
1218             PL_parser->bufend += len-highhalf;
1219             p = pv;
1220             while (p < e) {
1221                 if (UTF8_IS_INVARIANT(*p)) {
1222                     *bufptr++ = *p;
1223                     p++;
1224                 }
1225                 else {
1226                     assert(p < e -1 );
1227                     *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1228                     p += 2;
1229                 }
1230             }
1231         } else {
1232           plain_copy:
1233             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1234             bufptr = PL_parser->bufptr;
1235             Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1236             SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1237             PL_parser->bufend += len;
1238             Copy(pv, bufptr, len, char);
1239         }
1240     }
1241 }
1242
1243 /*
1244 =for apidoc lex_stuff_pv
1245
1246 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1247 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1248 reallocating the buffer if necessary.  This means that lexing code that
1249 runs later will see the characters as if they had appeared in the input.
1250 It is not recommended to do this as part of normal parsing, and most
1251 uses of this facility run the risk of the inserted characters being
1252 interpreted in an unintended manner.
1253
1254 The string to be inserted is represented by octets starting at C<pv>
1255 and continuing to the first nul.  These octets are interpreted as either
1256 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1257 in C<flags>.  The characters are recoded for the lexer buffer, according
1258 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1259 If it is not convenient to nul-terminate a string to be inserted, the
1260 L</lex_stuff_pvn> function is more appropriate.
1261
1262 =cut
1263 */
1264
1265 void
1266 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1267 {
1268     PERL_ARGS_ASSERT_LEX_STUFF_PV;
1269     lex_stuff_pvn(pv, strlen(pv), flags);
1270 }
1271
1272 /*
1273 =for apidoc lex_stuff_sv
1274
1275 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1276 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1277 reallocating the buffer if necessary.  This means that lexing code that
1278 runs later will see the characters as if they had appeared in the input.
1279 It is not recommended to do this as part of normal parsing, and most
1280 uses of this facility run the risk of the inserted characters being
1281 interpreted in an unintended manner.
1282
1283 The string to be inserted is the string value of C<sv>.  The characters
1284 are recoded for the lexer buffer, according to how the buffer is currently
1285 being interpreted (L</lex_bufutf8>).  If a string to be inserted is
1286 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1287 need to construct a scalar.
1288
1289 =cut
1290 */
1291
1292 void
1293 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1294 {
1295     char *pv;
1296     STRLEN len;
1297     PERL_ARGS_ASSERT_LEX_STUFF_SV;
1298     if (flags)
1299         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1300     pv = SvPV(sv, len);
1301     lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1302 }
1303
1304 /*
1305 =for apidoc lex_unstuff
1306
1307 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1308 C<ptr>.  Text following C<ptr> will be moved, and the buffer shortened.
1309 This hides the discarded text from any lexing code that runs later,
1310 as if the text had never appeared.
1311
1312 This is not the normal way to consume lexed text.  For that, use
1313 L</lex_read_to>.
1314
1315 =cut
1316 */
1317
1318 void
1319 Perl_lex_unstuff(pTHX_ char *ptr)
1320 {
1321     char *buf, *bufend;
1322     STRLEN unstuff_len;
1323     PERL_ARGS_ASSERT_LEX_UNSTUFF;
1324     buf = PL_parser->bufptr;
1325     if (ptr < buf)
1326         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1327     if (ptr == buf)
1328         return;
1329     bufend = PL_parser->bufend;
1330     if (ptr > bufend)
1331         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1332     unstuff_len = ptr - buf;
1333     Move(ptr, buf, bufend+1-ptr, char);
1334     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1335     PL_parser->bufend = bufend - unstuff_len;
1336 }
1337
1338 /*
1339 =for apidoc lex_read_to
1340
1341 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1342 to C<ptr>.  This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1343 performing the correct bookkeeping whenever a newline character is passed.
1344 This is the normal way to consume lexed text.
1345
1346 Interpretation of the buffer's octets can be abstracted out by
1347 using the slightly higher-level functions L</lex_peek_unichar> and
1348 L</lex_read_unichar>.
1349
1350 =cut
1351 */
1352
1353 void
1354 Perl_lex_read_to(pTHX_ char *ptr)
1355 {
1356     char *s;
1357     PERL_ARGS_ASSERT_LEX_READ_TO;
1358     s = PL_parser->bufptr;
1359     if (ptr < s || ptr > PL_parser->bufend)
1360         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1361     for (; s != ptr; s++)
1362         if (*s == '\n') {
1363             COPLINE_INC_WITH_HERELINES;
1364             PL_parser->linestart = s+1;
1365         }
1366     PL_parser->bufptr = ptr;
1367 }
1368
1369 /*
1370 =for apidoc lex_discard_to
1371
1372 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1373 up to C<ptr>.  The remaining content of the buffer will be moved, and
1374 all pointers into the buffer updated appropriately.  C<ptr> must not
1375 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1376 it is not permitted to discard text that has yet to be lexed.
1377
1378 Normally it is not necessarily to do this directly, because it suffices to
1379 use the implicit discarding behaviour of L</lex_next_chunk> and things
1380 based on it.  However, if a token stretches across multiple lines,
1381 and the lexing code has kept multiple lines of text in the buffer for
1382 that purpose, then after completion of the token it would be wise to
1383 explicitly discard the now-unneeded earlier lines, to avoid future
1384 multi-line tokens growing the buffer without bound.
1385
1386 =cut
1387 */
1388
1389 void
1390 Perl_lex_discard_to(pTHX_ char *ptr)
1391 {
1392     char *buf;
1393     STRLEN discard_len;
1394     PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1395     buf = SvPVX(PL_parser->linestr);
1396     if (ptr < buf)
1397         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1398     if (ptr == buf)
1399         return;
1400     if (ptr > PL_parser->bufptr)
1401         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1402     discard_len = ptr - buf;
1403     if (PL_parser->oldbufptr < ptr)
1404         PL_parser->oldbufptr = ptr;
1405     if (PL_parser->oldoldbufptr < ptr)
1406         PL_parser->oldoldbufptr = ptr;
1407     if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1408         PL_parser->last_uni = NULL;
1409     if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1410         PL_parser->last_lop = NULL;
1411     Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1412     SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1413     PL_parser->bufend -= discard_len;
1414     PL_parser->bufptr -= discard_len;
1415     PL_parser->oldbufptr -= discard_len;
1416     PL_parser->oldoldbufptr -= discard_len;
1417     if (PL_parser->last_uni)
1418         PL_parser->last_uni -= discard_len;
1419     if (PL_parser->last_lop)
1420         PL_parser->last_lop -= discard_len;
1421 }
1422
1423 void
1424 Perl_notify_parser_that_changed_to_utf8(pTHX)
1425 {
1426     /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1427      * off to on.  At compile time, this has the effect of entering a 'use
1428      * utf8' section.  This means that any input was not previously checked for
1429      * UTF-8 (because it was off), but now we do need to check it, or our
1430      * assumptions about the input being sane could be wrong, and we could
1431      * segfault.  This routine just sets a flag so that the next time we look
1432      * at the input we do the well-formed UTF-8 check.  If we aren't in the
1433      * proper phase, there may not be a parser object, but if there is, setting
1434      * the flag is harmless */
1435
1436     if (PL_parser) {
1437         PL_parser->recheck_utf8_validity = TRUE;
1438     }
1439 }
1440
1441 /*
1442 =for apidoc lex_next_chunk
1443
1444 Reads in the next chunk of text to be lexed, appending it to
1445 L</PL_parser-E<gt>linestr>.  This should be called when lexing code has
1446 looked to the end of the current chunk and wants to know more.  It is
1447 usual, but not necessary, for lexing to have consumed the entirety of
1448 the current chunk at this time.
1449
1450 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1451 chunk (i.e., the current chunk has been entirely consumed), normally the
1452 current chunk will be discarded at the same time that the new chunk is
1453 read in.  If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1454 will not be discarded.  If the current chunk has not been entirely
1455 consumed, then it will not be discarded regardless of the flag.
1456
1457 Returns true if some new text was added to the buffer, or false if the
1458 buffer has reached the end of the input text.
1459
1460 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1461
1462 =cut
1463 */
1464
1465 #define LEX_FAKE_EOF 0x80000000
1466 #define LEX_NO_TERM  0x40000000 /* here-doc */
1467
1468 bool
1469 Perl_lex_next_chunk(pTHX_ U32 flags)
1470 {
1471     SV *linestr;
1472     char *buf;
1473     STRLEN old_bufend_pos, new_bufend_pos;
1474     STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1475     STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1476     bool got_some_for_debugger = 0;
1477     bool got_some;
1478
1479     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1480         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1481     if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1482         return FALSE;
1483     linestr = PL_parser->linestr;
1484     buf = SvPVX(linestr);
1485     if (!(flags & LEX_KEEP_PREVIOUS)
1486           && PL_parser->bufptr == PL_parser->bufend)
1487     {
1488         old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1489         linestart_pos = 0;
1490         if (PL_parser->last_uni != PL_parser->bufend)
1491             PL_parser->last_uni = NULL;
1492         if (PL_parser->last_lop != PL_parser->bufend)
1493             PL_parser->last_lop = NULL;
1494         last_uni_pos = last_lop_pos = 0;
1495         *buf = 0;
1496         SvCUR_set(linestr, 0);
1497     } else {
1498         old_bufend_pos = PL_parser->bufend - buf;
1499         bufptr_pos = PL_parser->bufptr - buf;
1500         oldbufptr_pos = PL_parser->oldbufptr - buf;
1501         oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1502         linestart_pos = PL_parser->linestart - buf;
1503         last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1504         last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1505     }
1506     if (flags & LEX_FAKE_EOF) {
1507         goto eof;
1508     } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1509         got_some = 0;
1510     } else if (filter_gets(linestr, old_bufend_pos)) {
1511         got_some = 1;
1512         got_some_for_debugger = 1;
1513     } else if (flags & LEX_NO_TERM) {
1514         got_some = 0;
1515     } else {
1516         if (!SvPOK(linestr))   /* can get undefined by filter_gets */
1517             SvPVCLEAR(linestr);
1518         eof:
1519         /* End of real input.  Close filehandle (unless it was STDIN),
1520          * then add implicit termination.
1521          */
1522         if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1523             PerlIO_clearerr(PL_parser->rsfp);
1524         else if (PL_parser->rsfp)
1525             (void)PerlIO_close(PL_parser->rsfp);
1526         PL_parser->rsfp = NULL;
1527         PL_parser->in_pod = PL_parser->filtered = 0;
1528         if (!PL_in_eval && PL_minus_p) {
1529             sv_catpvs(linestr,
1530                 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1531             PL_minus_n = PL_minus_p = 0;
1532         } else if (!PL_in_eval && PL_minus_n) {
1533             sv_catpvs(linestr, /*{*/";}");
1534             PL_minus_n = 0;
1535         } else
1536             sv_catpvs(linestr, ";");
1537         got_some = 1;
1538     }
1539     buf = SvPVX(linestr);
1540     new_bufend_pos = SvCUR(linestr);
1541     PL_parser->bufend = buf + new_bufend_pos;
1542     PL_parser->bufptr = buf + bufptr_pos;
1543
1544     if (UTF) {
1545         const U8* first_bad_char_loc;
1546         if (UNLIKELY(! is_utf8_string_loc(
1547                             (U8 *) PL_parser->bufptr,
1548                                    PL_parser->bufend - PL_parser->bufptr,
1549                                    &first_bad_char_loc)))
1550         {
1551             _force_out_malformed_utf8_message(first_bad_char_loc,
1552                                               (U8 *) PL_parser->bufend,
1553                                               0,
1554                                               1 /* 1 means die */ );
1555             NOT_REACHED; /* NOTREACHED */
1556         }
1557     }
1558
1559     PL_parser->oldbufptr = buf + oldbufptr_pos;
1560     PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1561     PL_parser->linestart = buf + linestart_pos;
1562     if (PL_parser->last_uni)
1563         PL_parser->last_uni = buf + last_uni_pos;
1564     if (PL_parser->last_lop)
1565         PL_parser->last_lop = buf + last_lop_pos;
1566     if (PL_parser->preambling != NOLINE) {
1567         CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1568         PL_parser->preambling = NOLINE;
1569     }
1570     if (   got_some_for_debugger
1571         && PERLDB_LINE_OR_SAVESRC
1572         && PL_curstash != PL_debstash)
1573     {
1574         /* debugger active and we're not compiling the debugger code,
1575          * so store the line into the debugger's array of lines
1576          */
1577         update_debugger_info(NULL, buf+old_bufend_pos,
1578             new_bufend_pos-old_bufend_pos);
1579     }
1580     return got_some;
1581 }
1582
1583 /*
1584 =for apidoc lex_peek_unichar
1585
1586 Looks ahead one (Unicode) character in the text currently being lexed.
1587 Returns the codepoint (unsigned integer value) of the next character,
1588 or -1 if lexing has reached the end of the input text.  To consume the
1589 peeked character, use L</lex_read_unichar>.
1590
1591 If the next character is in (or extends into) the next chunk of input
1592 text, the next chunk will be read in.  Normally the current chunk will be
1593 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1594 bit set, then the current chunk will not be discarded.
1595
1596 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1597 is encountered, an exception is generated.
1598
1599 =cut
1600 */
1601
1602 I32
1603 Perl_lex_peek_unichar(pTHX_ U32 flags)
1604 {
1605     char *s, *bufend;
1606     if (flags & ~(LEX_KEEP_PREVIOUS))
1607         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1608     s = PL_parser->bufptr;
1609     bufend = PL_parser->bufend;
1610     if (UTF) {
1611         U8 head;
1612         I32 unichar;
1613         STRLEN len, retlen;
1614         if (s == bufend) {
1615             if (!lex_next_chunk(flags))
1616                 return -1;
1617             s = PL_parser->bufptr;
1618             bufend = PL_parser->bufend;
1619         }
1620         head = (U8)*s;
1621         if (UTF8_IS_INVARIANT(head))
1622             return head;
1623         if (UTF8_IS_START(head)) {
1624             len = UTF8SKIP(&head);
1625             while ((STRLEN)(bufend-s) < len) {
1626                 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1627                     break;
1628                 s = PL_parser->bufptr;
1629                 bufend = PL_parser->bufend;
1630             }
1631         }
1632         unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1633         if (retlen == (STRLEN)-1) {
1634             _force_out_malformed_utf8_message((U8 *) s,
1635                                               (U8 *) bufend,
1636                                               0,
1637                                               1 /* 1 means die */ );
1638             NOT_REACHED; /* NOTREACHED */
1639         }
1640         return unichar;
1641     } else {
1642         if (s == bufend) {
1643             if (!lex_next_chunk(flags))
1644                 return -1;
1645             s = PL_parser->bufptr;
1646         }
1647         return (U8)*s;
1648     }
1649 }
1650
1651 /*
1652 =for apidoc lex_read_unichar
1653
1654 Reads the next (Unicode) character in the text currently being lexed.
1655 Returns the codepoint (unsigned integer value) of the character read,
1656 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1657 if lexing has reached the end of the input text.  To non-destructively
1658 examine the next character, use L</lex_peek_unichar> instead.
1659
1660 If the next character is in (or extends into) the next chunk of input
1661 text, the next chunk will be read in.  Normally the current chunk will be
1662 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1663 bit set, then the current chunk will not be discarded.
1664
1665 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1666 is encountered, an exception is generated.
1667
1668 =cut
1669 */
1670
1671 I32
1672 Perl_lex_read_unichar(pTHX_ U32 flags)
1673 {
1674     I32 c;
1675     if (flags & ~(LEX_KEEP_PREVIOUS))
1676         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1677     c = lex_peek_unichar(flags);
1678     if (c != -1) {
1679         if (c == '\n')
1680             COPLINE_INC_WITH_HERELINES;
1681         if (UTF)
1682             PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1683         else
1684             ++(PL_parser->bufptr);
1685     }
1686     return c;
1687 }
1688
1689 /*
1690 =for apidoc lex_read_space
1691
1692 Reads optional spaces, in Perl style, in the text currently being
1693 lexed.  The spaces may include ordinary whitespace characters and
1694 Perl-style comments.  C<#line> directives are processed if encountered.
1695 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1696 at a non-space character (or the end of the input text).
1697
1698 If spaces extend into the next chunk of input text, the next chunk will
1699 be read in.  Normally the current chunk will be discarded at the same
1700 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1701 chunk will not be discarded.
1702
1703 =cut
1704 */
1705
1706 #define LEX_NO_INCLINE    0x40000000
1707 #define LEX_NO_NEXT_CHUNK 0x80000000
1708
1709 void
1710 Perl_lex_read_space(pTHX_ U32 flags)
1711 {
1712     char *s, *bufend;
1713     const bool can_incline = !(flags & LEX_NO_INCLINE);
1714     bool need_incline = 0;
1715     if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1716         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1717     s = PL_parser->bufptr;
1718     bufend = PL_parser->bufend;
1719     while (1) {
1720         char c = *s;
1721         if (c == '#') {
1722             do {
1723                 c = *++s;
1724             } while (!(c == '\n' || (c == 0 && s == bufend)));
1725         } else if (c == '\n') {
1726             s++;
1727             if (can_incline) {
1728                 PL_parser->linestart = s;
1729                 if (s == bufend)
1730                     need_incline = 1;
1731                 else
1732                     incline(s, bufend);
1733             }
1734         } else if (isSPACE(c)) {
1735             s++;
1736         } else if (c == 0 && s == bufend) {
1737             bool got_more;
1738             line_t l;
1739             if (flags & LEX_NO_NEXT_CHUNK)
1740                 break;
1741             PL_parser->bufptr = s;
1742             l = CopLINE(PL_curcop);
1743             CopLINE(PL_curcop) += PL_parser->herelines + 1;
1744             got_more = lex_next_chunk(flags);
1745             CopLINE_set(PL_curcop, l);
1746             s = PL_parser->bufptr;
1747             bufend = PL_parser->bufend;
1748             if (!got_more)
1749                 break;
1750             if (can_incline && need_incline && PL_parser->rsfp) {
1751                 incline(s, bufend);
1752                 need_incline = 0;
1753             }
1754         } else if (!c) {
1755             s++;
1756         } else {
1757             break;
1758         }
1759     }
1760     PL_parser->bufptr = s;
1761 }
1762
1763 /*
1764
1765 =for apidoc validate_proto
1766
1767 This function performs syntax checking on a prototype, C<proto>.
1768 If C<warn> is true, any illegal characters or mismatched brackets
1769 will trigger illegalproto warnings, declaring that they were
1770 detected in the prototype for C<name>.
1771
1772 The return value is C<true> if this is a valid prototype, and
1773 C<false> if it is not, regardless of whether C<warn> was C<true> or
1774 C<false>.
1775
1776 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1777
1778 =cut
1779
1780  */
1781
1782 bool
1783 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1784 {
1785     STRLEN len, origlen;
1786     char *p;
1787     bool bad_proto = FALSE;
1788     bool in_brackets = FALSE;
1789     bool after_slash = FALSE;
1790     char greedy_proto = ' ';
1791     bool proto_after_greedy_proto = FALSE;
1792     bool must_be_last = FALSE;
1793     bool underscore = FALSE;
1794     bool bad_proto_after_underscore = FALSE;
1795
1796     PERL_ARGS_ASSERT_VALIDATE_PROTO;
1797
1798     if (!proto)
1799         return TRUE;
1800
1801     p = SvPV(proto, len);
1802     origlen = len;
1803     for (; len--; p++) {
1804         if (!isSPACE(*p)) {
1805             if (must_be_last)
1806                 proto_after_greedy_proto = TRUE;
1807             if (underscore) {
1808                 if (!memCHRs(";@%", *p))
1809                     bad_proto_after_underscore = TRUE;
1810                 underscore = FALSE;
1811             }
1812             if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1813                 bad_proto = TRUE;
1814             }
1815             else {
1816                 if (*p == '[')
1817                     in_brackets = TRUE;
1818                 else if (*p == ']')
1819                     in_brackets = FALSE;
1820                 else if ((*p == '@' || *p == '%')
1821                          && !after_slash
1822                          && !in_brackets )
1823                 {
1824                     must_be_last = TRUE;
1825                     greedy_proto = *p;
1826                 }
1827                 else if (*p == '_')
1828                     underscore = TRUE;
1829             }
1830             if (*p == '\\')
1831                 after_slash = TRUE;
1832             else
1833                 after_slash = FALSE;
1834         }
1835     }
1836
1837     if (warn) {
1838         SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1839         p -= origlen;
1840         p = SvUTF8(proto)
1841             ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1842                              origlen, UNI_DISPLAY_ISPRINT)
1843             : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1844
1845         if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1846             SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1847             sv_catpvs(name2, "::");
1848             sv_catsv(name2, (SV *)name);
1849             name = name2;
1850         }
1851
1852         if (proto_after_greedy_proto)
1853             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1854                         "Prototype after '%c' for %" SVf " : %s",
1855                         greedy_proto, SVfARG(name), p);
1856         if (in_brackets)
1857             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1858                         "Missing ']' in prototype for %" SVf " : %s",
1859                         SVfARG(name), p);
1860         if (bad_proto)
1861             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1862                         "Illegal character in prototype for %" SVf " : %s",
1863                         SVfARG(name), p);
1864         if (bad_proto_after_underscore)
1865             Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1866                         "Illegal character after '_' in prototype for %" SVf " : %s",
1867                         SVfARG(name), p);
1868     }
1869
1870     return (! (proto_after_greedy_proto || bad_proto) );
1871 }
1872
1873 /*
1874  * S_incline
1875  * This subroutine has nothing to do with tilting, whether at windmills
1876  * or pinball tables.  Its name is short for "increment line".  It
1877  * increments the current line number in CopLINE(PL_curcop) and checks
1878  * to see whether the line starts with a comment of the form
1879  *    # line 500 "foo.pm"
1880  * If so, it sets the current line number and file to the values in the comment.
1881  */
1882
1883 STATIC void
1884 S_incline(pTHX_ const char *s, const char *end)
1885 {
1886     const char *t;
1887     const char *n;
1888     const char *e;
1889     line_t line_num;
1890     UV uv;
1891
1892     PERL_ARGS_ASSERT_INCLINE;
1893
1894     assert(end >= s);
1895
1896     COPLINE_INC_WITH_HERELINES;
1897     if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1898      && s+1 == PL_bufend && *s == ';') {
1899         /* fake newline in string eval */
1900         CopLINE_dec(PL_curcop);
1901         return;
1902     }
1903     if (*s++ != '#')
1904         return;
1905     while (SPACE_OR_TAB(*s))
1906         s++;
1907     if (memBEGINs(s, (STRLEN) (end - s), "line"))
1908         s += sizeof("line") - 1;
1909     else
1910         return;
1911     if (SPACE_OR_TAB(*s))
1912         s++;
1913     else
1914         return;
1915     while (SPACE_OR_TAB(*s))
1916         s++;
1917     if (!isDIGIT(*s))
1918         return;
1919
1920     n = s;
1921     while (isDIGIT(*s))
1922         s++;
1923     if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1924         return;
1925     while (SPACE_OR_TAB(*s))
1926         s++;
1927     if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1928         s++;
1929         e = t + 1;
1930     }
1931     else {
1932         t = s;
1933         while (*t && !isSPACE(*t))
1934             t++;
1935         e = t;
1936     }
1937     while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1938         e++;
1939     if (*e != '\n' && *e != '\0')
1940         return;         /* false alarm */
1941
1942     if (!grok_atoUV(n, &uv, &e))
1943         return;
1944     line_num = ((line_t)uv) - 1;
1945
1946     if (t - s > 0) {
1947         const STRLEN len = t - s;
1948
1949         if (!PL_rsfp && !PL_parser->filtered) {
1950             /* must copy *{"::_<(eval N)[oldfilename:L]"}
1951              * to *{"::_<newfilename"} */
1952             /* However, the long form of evals is only turned on by the
1953                debugger - usually they're "(eval %lu)" */
1954             GV * const cfgv = CopFILEGV(PL_curcop);
1955             if (cfgv) {
1956                 char smallbuf[128];
1957                 STRLEN tmplen2 = len;
1958                 char *tmpbuf2;
1959                 GV *gv2;
1960
1961                 if (tmplen2 + 2 <= sizeof smallbuf)
1962                     tmpbuf2 = smallbuf;
1963                 else
1964                     Newx(tmpbuf2, tmplen2 + 2, char);
1965
1966                 tmpbuf2[0] = '_';
1967                 tmpbuf2[1] = '<';
1968
1969                 memcpy(tmpbuf2 + 2, s, tmplen2);
1970                 tmplen2 += 2;
1971
1972                 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1973                 if (!isGV(gv2)) {
1974                     gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1975                     /* adjust ${"::_<newfilename"} to store the new file name */
1976                     GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1977                     /* The line number may differ. If that is the case,
1978                        alias the saved lines that are in the array.
1979                        Otherwise alias the whole array. */
1980                     if (CopLINE(PL_curcop) == line_num) {
1981                         GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1982                         GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1983                     }
1984                     else if (GvAV(cfgv)) {
1985                         AV * const av = GvAV(cfgv);
1986                         const line_t start = CopLINE(PL_curcop)+1;
1987                         SSize_t items = AvFILLp(av) - start;
1988                         if (items > 0) {
1989                             AV * const av2 = GvAVn(gv2);
1990                             SV **svp = AvARRAY(av) + start;
1991                             Size_t l = line_num+1;
1992                             while (items-- && l < SSize_t_MAX && l == (line_t)l)
1993                                 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1994                         }
1995                     }
1996                 }
1997
1998                 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1999             }
2000         }
2001         CopFILE_free(PL_curcop);
2002         CopFILE_setn(PL_curcop, s, len);
2003     }
2004     CopLINE_set(PL_curcop, line_num);
2005 }
2006
2007 STATIC void
2008 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
2009 {
2010     AV *av = CopFILEAVx(PL_curcop);
2011     if (av) {
2012         SV * sv;
2013         if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
2014         else {
2015             sv = *av_fetch(av, 0, 1);
2016             SvUPGRADE(sv, SVt_PVMG);
2017         }
2018         if (!SvPOK(sv)) SvPVCLEAR(sv);
2019         if (orig_sv)
2020             sv_catsv(sv, orig_sv);
2021         else
2022             sv_catpvn(sv, buf, len);
2023         if (!SvIOK(sv)) {
2024             (void)SvIOK_on(sv);
2025             SvIV_set(sv, 0);
2026         }
2027         if (PL_parser->preambling == NOLINE)
2028             av_store(av, CopLINE(PL_curcop), sv);
2029     }
2030 }
2031
2032 /*
2033  * skipspace
2034  * Called to gobble the appropriate amount and type of whitespace.
2035  * Skips comments as well.
2036  * Returns the next character after the whitespace that is skipped.
2037  *
2038  * peekspace
2039  * Same thing, but look ahead without incrementing line numbers or
2040  * adjusting PL_linestart.
2041  */
2042
2043 #define skipspace(s) skipspace_flags(s, 0)
2044 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2045
2046 char *
2047 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2048 {
2049     PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2050     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2051         while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2052             s++;
2053     } else {
2054         STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2055         PL_bufptr = s;
2056         lex_read_space(flags | LEX_KEEP_PREVIOUS |
2057                 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2058                     LEX_NO_NEXT_CHUNK : 0));
2059         s = PL_bufptr;
2060         PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2061         if (PL_linestart > PL_bufptr)
2062             PL_bufptr = PL_linestart;
2063         return s;
2064     }
2065     return s;
2066 }
2067
2068 /*
2069  * S_check_uni
2070  * Check the unary operators to ensure there's no ambiguity in how they're
2071  * used.  An ambiguous piece of code would be:
2072  *     rand + 5
2073  * This doesn't mean rand() + 5.  Because rand() is a unary operator,
2074  * the +5 is its argument.
2075  */
2076
2077 STATIC void
2078 S_check_uni(pTHX)
2079 {
2080     const char *s;
2081
2082     if (PL_oldoldbufptr != PL_last_uni)
2083         return;
2084     while (isSPACE(*PL_last_uni))
2085         PL_last_uni++;
2086     s = PL_last_uni;
2087     while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2088         s += UTF ? UTF8SKIP(s) : 1;
2089     if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2090         return;
2091
2092     Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2093                      "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2094                      UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2095 }
2096
2097 /*
2098  * LOP : macro to build a list operator.  Its behaviour has been replaced
2099  * with a subroutine, S_lop() for which LOP is just another name.
2100  */
2101
2102 #define LOP(f,x) return lop(f,x,s)
2103
2104 /*
2105  * S_lop
2106  * Build a list operator (or something that might be one).  The rules:
2107  *  - if we have a next token, then it's a list operator (no parens) for
2108  *    which the next token has already been parsed; e.g.,
2109  *       sort foo @args
2110  *       sort foo (@args)
2111  *  - if the next thing is an opening paren, then it's a function
2112  *  - else it's a list operator
2113  */
2114
2115 STATIC I32
2116 S_lop(pTHX_ I32 f, U8 x, char *s)
2117 {
2118     PERL_ARGS_ASSERT_LOP;
2119
2120     pl_yylval.ival = f;
2121     CLINE;
2122     PL_bufptr = s;
2123     PL_last_lop = PL_oldbufptr;
2124     PL_last_lop_op = (OPCODE)f;
2125     if (PL_nexttoke)
2126         goto lstop;
2127     PL_expect = x;
2128     if (*s == '(')
2129         return REPORT(FUNC);
2130     s = skipspace(s);
2131     if (*s == '(')
2132         return REPORT(FUNC);
2133     else {
2134         lstop:
2135         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2136             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2137         return REPORT(LSTOP);
2138     }
2139 }
2140
2141 /*
2142  * S_force_next
2143  * When the lexer realizes it knows the next token (for instance,
2144  * it is reordering tokens for the parser) then it can call S_force_next
2145  * to know what token to return the next time the lexer is called.  Caller
2146  * will need to set PL_nextval[] and possibly PL_expect to ensure
2147  * the lexer handles the token correctly.
2148  */
2149
2150 STATIC void
2151 S_force_next(pTHX_ I32 type)
2152 {
2153 #ifdef DEBUGGING
2154     if (DEBUG_T_TEST) {
2155         PerlIO_printf(Perl_debug_log, "### forced token:\n");
2156         tokereport(type, &NEXTVAL_NEXTTOKE);
2157     }
2158 #endif
2159     assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2160     PL_nexttype[PL_nexttoke] = type;
2161     PL_nexttoke++;
2162 }
2163
2164 /*
2165  * S_postderef
2166  *
2167  * This subroutine handles postfix deref syntax after the arrow has already
2168  * been emitted.  @* $* etc. are emitted as two separate tokens right here.
2169  * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2170  * only the first, leaving yylex to find the next.
2171  */
2172
2173 static int
2174 S_postderef(pTHX_ int const funny, char const next)
2175 {
2176     assert(funny == DOLSHARP
2177         || funny == PERLY_DOLLAR
2178         || funny == PERLY_SNAIL
2179         || funny == PERLY_PERCENT_SIGN
2180         || funny == PERLY_AMPERSAND
2181         || funny == PERLY_STAR
2182     );
2183     if (next == '*') {
2184         PL_expect = XOPERATOR;
2185         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2186             assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2187             PL_lex_state = LEX_INTERPEND;
2188             if (PERLY_SNAIL == funny)
2189                 force_next(POSTJOIN);
2190         }
2191         force_next(PERLY_STAR);
2192         PL_bufptr+=2;
2193     }
2194     else {
2195         if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2196          && !PL_lex_brackets)
2197             PL_lex_dojoin = 2;
2198         PL_expect = XOPERATOR;
2199         PL_bufptr++;
2200     }
2201     return funny;
2202 }
2203
2204 void
2205 Perl_yyunlex(pTHX)
2206 {
2207     int yyc = PL_parser->yychar;
2208     if (yyc != YYEMPTY) {
2209         if (yyc) {
2210             NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211             if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2212                 PL_lex_allbrackets--;
2213                 PL_lex_brackets--;
2214                 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215             } else if (yyc == PERLY_PAREN_OPEN) {
2216                 PL_lex_allbrackets--;
2217                 yyc |= (2<<24);
2218             }
2219             force_next(yyc);
2220         }
2221         PL_parser->yychar = YYEMPTY;
2222     }
2223 }
2224
2225 STATIC SV *
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2227 {
2228     SV * const sv = newSVpvn_utf8(start, len,
2229                     ! IN_BYTES
2230                   &&  UTF
2231                   &&  len != 0
2232                   &&  is_utf8_non_invariant_string((const U8*)start, len));
2233     return sv;
2234 }
2235
2236 /*
2237  * S_force_word
2238  * When the lexer knows the next thing is a word (for instance, it has
2239  * just seen -> and it knows that the next char is a word char, then
2240  * it calls S_force_word to stick the next word into the PL_nexttoke/val
2241  * lookahead.
2242  *
2243  * Arguments:
2244  *   char *start : buffer position (must be within PL_linestr)
2245  *   int token   : PL_next* will be this type of bare word
2246  *                 (e.g., METHCALL0,BAREWORD)
2247  *   int check_keyword : if true, Perl checks to make sure the word isn't
2248  *       a keyword (do this if the word is a label, e.g. goto FOO)
2249  *   int allow_pack : if true, : characters will also be allowed (require,
2250  *       use, etc. do this)
2251  */
2252
2253 STATIC char *
2254 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2255 {
2256     char *s;
2257     STRLEN len;
2258
2259     PERL_ARGS_ASSERT_FORCE_WORD;
2260
2261     start = skipspace(start);
2262     s = start;
2263     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2264         || (allow_pack && *s == ':' && s[1] == ':') )
2265     {
2266         s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len, allow_pack);
2267         if (check_keyword) {
2268           char *s2 = PL_tokenbuf;
2269           STRLEN len2 = len;
2270           if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2271             s2 += sizeof("CORE::") - 1;
2272             len2 -= sizeof("CORE::") - 1;
2273           }
2274           if (keyword(s2, len2, 0))
2275             return start;
2276         }
2277         if (token == METHCALL0) {
2278             s = skipspace(s);
2279             if (*s == '(')
2280                 PL_expect = XTERM;
2281             else {
2282                 PL_expect = XOPERATOR;
2283             }
2284         }
2285         NEXTVAL_NEXTTOKE.opval
2286             = newSVOP(OP_CONST,0,
2287                            S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2288         NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2289         force_next(token);
2290     }
2291     return s;
2292 }
2293
2294 /*
2295  * S_force_ident
2296  * Called when the lexer wants $foo *foo &foo etc, but the program
2297  * text only contains the "foo" portion.  The first argument is a pointer
2298  * to the "foo", and the second argument is the type symbol to prefix.
2299  * Forces the next token to be a "BAREWORD".
2300  * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2301  */
2302
2303 STATIC void
2304 S_force_ident(pTHX_ const char *s, int kind)
2305 {
2306     PERL_ARGS_ASSERT_FORCE_IDENT;
2307
2308     if (s[0]) {
2309         const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2310         OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2311                                                                 UTF ? SVf_UTF8 : 0));
2312         NEXTVAL_NEXTTOKE.opval = o;
2313         force_next(BAREWORD);
2314         if (kind) {
2315             o->op_private = OPpCONST_ENTERED;
2316             /* XXX see note in pp_entereval() for why we forgo typo
2317                warnings if the symbol must be introduced in an eval.
2318                GSAR 96-10-12 */
2319             gv_fetchpvn_flags(s, len,
2320                               (PL_in_eval ? GV_ADDMULTI
2321                               : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2322                               kind == PERLY_DOLLAR ? SVt_PV :
2323                               kind == PERLY_SNAIL ? SVt_PVAV :
2324                               kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2325                               SVt_PVGV
2326                               );
2327         }
2328     }
2329 }
2330
2331 static void
2332 S_force_ident_maybe_lex(pTHX_ char pit)
2333 {
2334     NEXTVAL_NEXTTOKE.ival = pit;
2335     force_next('p');
2336 }
2337
2338 NV
2339 Perl_str_to_version(pTHX_ SV *sv)
2340 {
2341     NV retval = 0.0;
2342     NV nshift = 1.0;
2343     STRLEN len;
2344     const char *start = SvPV_const(sv,len);
2345     const char * const end = start + len;
2346     const bool utf = cBOOL(SvUTF8(sv));
2347
2348     PERL_ARGS_ASSERT_STR_TO_VERSION;
2349
2350     while (start < end) {
2351         STRLEN skip;
2352         UV n;
2353         if (utf)
2354             n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2355         else {
2356             n = *(U8*)start;
2357             skip = 1;
2358         }
2359         retval += ((NV)n)/nshift;
2360         start += skip;
2361         nshift *= 1000;
2362     }
2363     return retval;
2364 }
2365
2366 /*
2367  * S_force_version
2368  * Forces the next token to be a version number.
2369  * If the next token appears to be an invalid version number, (e.g. "v2b"),
2370  * and if "guessing" is TRUE, then no new token is created (and the caller
2371  * must use an alternative parsing method).
2372  */
2373
2374 STATIC char *
2375 S_force_version(pTHX_ char *s, int guessing)
2376 {
2377     OP *version = NULL;
2378     char *d;
2379
2380     PERL_ARGS_ASSERT_FORCE_VERSION;
2381
2382     s = skipspace(s);
2383
2384     d = s;
2385     if (*d == 'v')
2386         d++;
2387     if (isDIGIT(*d)) {
2388         while (isDIGIT(*d) || *d == '_' || *d == '.')
2389             d++;
2390         if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2391             SV *ver;
2392             s = scan_num(s, &pl_yylval);
2393             version = pl_yylval.opval;
2394             ver = cSVOPx(version)->op_sv;
2395             if (SvPOK(ver) && !SvNIOK(ver)) {
2396                 SvUPGRADE(ver, SVt_PVNV);
2397                 SvNV_set(ver, str_to_version(ver));
2398                 SvNOK_on(ver);          /* hint that it is a version */
2399             }
2400         }
2401         else if (guessing) {
2402             return s;
2403         }
2404     }
2405
2406     /* NOTE: The parser sees the package name and the VERSION swapped */
2407     NEXTVAL_NEXTTOKE.opval = version;
2408     force_next(BAREWORD);
2409
2410     return s;
2411 }
2412
2413 /*
2414  * S_force_strict_version
2415  * Forces the next token to be a version number using strict syntax rules.
2416  */
2417
2418 STATIC char *
2419 S_force_strict_version(pTHX_ char *s)
2420 {
2421     OP *version = NULL;
2422     const char *errstr = NULL;
2423
2424     PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2425
2426     while (isSPACE(*s)) /* leading whitespace */
2427         s++;
2428
2429     if (is_STRICT_VERSION(s,&errstr)) {
2430         SV *ver = newSV_type(SVt_NULL);
2431         s = (char *)scan_version(s, ver, 0);
2432         version = newSVOP(OP_CONST, 0, ver);
2433     }
2434     else if ((*s != ';' && *s != ':' && *s != '{' && *s != '}' )
2435              && (s = skipspace(s), (*s != ';' && *s != ':' && *s != '{' && *s != '}' )))
2436     {
2437         PL_bufptr = s;
2438         if (errstr)
2439             yyerror(errstr); /* version required */
2440         return s;
2441     }
2442
2443     /* NOTE: The parser sees the package name and the VERSION swapped */
2444     NEXTVAL_NEXTTOKE.opval = version;
2445     force_next(BAREWORD);
2446
2447     return s;
2448 }
2449
2450 /*
2451  * S_tokeq
2452  * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2453  * modified as necessary.  However, if HINT_NEW_STRING is on, 'sv' is
2454  * unchanged, and a new SV containing the modified input is returned.
2455  */
2456
2457 STATIC SV *
2458 S_tokeq(pTHX_ SV *sv)
2459 {
2460     char *s;
2461     char *send;
2462     char *d;
2463     SV *pv = sv;
2464
2465     PERL_ARGS_ASSERT_TOKEQ;
2466
2467     assert (SvPOK(sv));
2468     assert (SvLEN(sv));
2469     assert (!SvIsCOW(sv));
2470     if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2471         goto finish;
2472     s = SvPVX(sv);
2473     send = SvEND(sv);
2474     /* This is relying on the SV being "well formed" with a trailing '\0'  */
2475     while (s < send && !(*s == '\\' && s[1] == '\\'))
2476         s++;
2477     if (s == send)
2478         goto finish;
2479     d = s;
2480     if ( PL_hints & HINT_NEW_STRING ) {
2481         pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2482                             SVs_TEMP | SvUTF8(sv));
2483     }
2484     while (s < send) {
2485         if (*s == '\\') {
2486             if (s + 1 < send && (s[1] == '\\'))
2487                 s++;            /* all that, just for this */
2488         }
2489         *d++ = *s++;
2490     }
2491     *d = '\0';
2492     SvCUR_set(sv, d - SvPVX_const(sv));
2493   finish:
2494     if ( PL_hints & HINT_NEW_STRING )
2495        return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2496     return sv;
2497 }
2498
2499 /*
2500  * Now come three functions related to double-quote context,
2501  * S_sublex_start, S_sublex_push, and S_sublex_done.  They're used when
2502  * converting things like "\u\Lgnat" into ucfirst(lc("gnat")).  They
2503  * interact with PL_lex_state, and create fake ( ... ) argument lists
2504  * to handle functions and concatenation.
2505  * For example,
2506  *   "foo\lbar"
2507  * is tokenised as
2508  *    stringify ( const[foo] concat lcfirst ( const[bar] ) )
2509  */
2510
2511 /*
2512  * S_sublex_start
2513  * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2514  *
2515  * Pattern matching will set PL_lex_op to the pattern-matching op to
2516  * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2517  *
2518  * OP_CONST is easy--just make the new op and return.
2519  *
2520  * Everything else becomes a FUNC.
2521  *
2522  * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2523  * had an OP_CONST.  This just sets us up for a
2524  * call to S_sublex_push().
2525  */
2526
2527 STATIC I32
2528 S_sublex_start(pTHX)
2529 {
2530     const I32 op_type = pl_yylval.ival;
2531
2532     if (op_type == OP_NULL) {
2533         pl_yylval.opval = PL_lex_op;
2534         PL_lex_op = NULL;
2535         return THING;
2536     }
2537     if (op_type == OP_CONST) {
2538         SV *sv = PL_lex_stuff;
2539         PL_lex_stuff = NULL;
2540         sv = tokeq(sv);
2541
2542         if (SvTYPE(sv) == SVt_PVIV) {
2543             /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2544             STRLEN len;
2545             const char * const p = SvPV_const(sv, len);
2546             SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2547             SvREFCNT_dec(sv);
2548             sv = nsv;
2549         }
2550         pl_yylval.opval = newSVOP(op_type, 0, sv);
2551         return THING;
2552     }
2553
2554     PL_parser->lex_super_state = PL_lex_state;
2555     PL_parser->lex_sub_inwhat = (U16)op_type;
2556     PL_parser->lex_sub_op = PL_lex_op;
2557     PL_parser->sub_no_recover = FALSE;
2558     PL_parser->sub_error_count = PL_error_count;
2559     PL_lex_state = LEX_INTERPPUSH;
2560
2561     PL_expect = XTERM;
2562     if (PL_lex_op) {
2563         pl_yylval.opval = PL_lex_op;
2564         PL_lex_op = NULL;
2565         return PMFUNC;
2566     }
2567     else
2568         return FUNC;
2569 }
2570
2571 /*
2572  * S_sublex_push
2573  * Create a new scope to save the lexing state.  The scope will be
2574  * ended in S_sublex_done.  Returns a '(', starting the function arguments
2575  * to the uc, lc, etc. found before.
2576  * Sets PL_lex_state to LEX_INTERPCONCAT.
2577  */
2578
2579 STATIC I32
2580 S_sublex_push(pTHX)
2581 {
2582     LEXSHARED *shared;
2583     const bool is_heredoc = PL_multi_close == '<';
2584     ENTER;
2585
2586     PL_lex_state = PL_parser->lex_super_state;
2587     SAVEI8(PL_lex_dojoin);
2588     SAVEI32(PL_lex_brackets);
2589     SAVEI32(PL_lex_allbrackets);
2590     SAVEI32(PL_lex_formbrack);
2591     SAVEI8(PL_lex_fakeeof);
2592     SAVEI32(PL_lex_casemods);
2593     SAVEI32(PL_lex_starts);
2594     SAVEI8(PL_lex_state);
2595     SAVESPTR(PL_lex_repl);
2596     SAVEVPTR(PL_lex_inpat);
2597     SAVEI16(PL_lex_inwhat);
2598     if (is_heredoc)
2599     {
2600         SAVECOPLINE(PL_curcop);
2601         SAVEI32(PL_multi_end);
2602         SAVEI32(PL_parser->herelines);
2603         PL_parser->herelines = 0;
2604     }
2605     SAVEIV(PL_multi_close);
2606     SAVEPPTR(PL_bufptr);
2607     SAVEPPTR(PL_bufend);
2608     SAVEPPTR(PL_oldbufptr);
2609     SAVEPPTR(PL_oldoldbufptr);
2610     SAVEPPTR(PL_last_lop);
2611     SAVEPPTR(PL_last_uni);
2612     SAVEPPTR(PL_linestart);
2613     SAVESPTR(PL_linestr);
2614     SAVEGENERICPV(PL_lex_brackstack);
2615     SAVEGENERICPV(PL_lex_casestack);
2616     SAVEGENERICPV(PL_parser->lex_shared);
2617     SAVEBOOL(PL_parser->lex_re_reparsing);
2618     SAVEI32(PL_copline);
2619
2620     /* The here-doc parser needs to be able to peek into outer lexing
2621        scopes to find the body of the here-doc.  So we put PL_linestr and
2622        PL_bufptr into lex_shared, to 'share' those values.
2623      */
2624     PL_parser->lex_shared->ls_linestr = PL_linestr;
2625     PL_parser->lex_shared->ls_bufptr  = PL_bufptr;
2626
2627     PL_linestr = PL_lex_stuff;
2628     PL_lex_repl = PL_parser->lex_sub_repl;
2629     PL_lex_stuff = NULL;
2630     PL_parser->lex_sub_repl = NULL;
2631
2632     /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2633        set for an inner quote-like operator and then an error causes scope-
2634        popping.  We must not have a PL_lex_stuff value left dangling, as
2635        that breaks assumptions elsewhere.  See bug #123617.  */
2636     SAVEGENERICSV(PL_lex_stuff);
2637     SAVEGENERICSV(PL_parser->lex_sub_repl);
2638
2639     PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2640         = SvPVX(PL_linestr);
2641     PL_bufend += SvCUR(PL_linestr);
2642     PL_last_lop = PL_last_uni = NULL;
2643     SAVEFREESV(PL_linestr);
2644     if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2645
2646     PL_lex_dojoin = FALSE;
2647     PL_lex_brackets = PL_lex_formbrack = 0;
2648     PL_lex_allbrackets = 0;
2649     PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2650     Newx(PL_lex_brackstack, 120, char);
2651     Newx(PL_lex_casestack, 12, char);
2652     PL_lex_casemods = 0;
2653     *PL_lex_casestack = '\0';
2654     PL_lex_starts = 0;
2655     PL_lex_state = LEX_INTERPCONCAT;
2656     if (is_heredoc)
2657         CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2658     PL_copline = NOLINE;
2659
2660     Newxz(shared, 1, LEXSHARED);
2661     shared->ls_prev = PL_parser->lex_shared;
2662     PL_parser->lex_shared = shared;
2663
2664     PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2665     if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2666     if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2667         PL_lex_inpat = PL_parser->lex_sub_op;
2668     else
2669         PL_lex_inpat = NULL;
2670
2671     PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2672     PL_in_eval &= ~EVAL_RE_REPARSING;
2673
2674     return SUBLEXSTART;
2675 }
2676
2677 /*
2678  * S_sublex_done
2679  * Restores lexer state after a S_sublex_push.
2680  */
2681
2682 STATIC I32
2683 S_sublex_done(pTHX)
2684 {
2685     if (!PL_lex_starts++) {
2686         SV * const sv = newSVpvs("");
2687         if (SvUTF8(PL_linestr))
2688             SvUTF8_on(sv);
2689         PL_expect = XOPERATOR;
2690         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2691         return THING;
2692     }
2693
2694     if (PL_lex_casemods) {              /* oops, we've got some unbalanced parens */
2695         PL_lex_state = LEX_INTERPCASEMOD;
2696         return yylex();
2697     }
2698
2699     /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2700     assert(PL_lex_inwhat != OP_TRANSR);
2701     if (PL_lex_repl) {
2702         assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2703         PL_linestr = PL_lex_repl;
2704         PL_lex_inpat = 0;
2705         PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2706         PL_bufend += SvCUR(PL_linestr);
2707         PL_last_lop = PL_last_uni = NULL;
2708         PL_lex_dojoin = FALSE;
2709         PL_lex_brackets = 0;
2710         PL_lex_allbrackets = 0;
2711         PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2712         PL_lex_casemods = 0;
2713         *PL_lex_casestack = '\0';
2714         PL_lex_starts = 0;
2715         if (SvEVALED(PL_lex_repl)) {
2716             PL_lex_state = LEX_INTERPNORMAL;
2717             PL_lex_starts++;
2718             /*  we don't clear PL_lex_repl here, so that we can check later
2719                 whether this is an evalled subst; that means we rely on the
2720                 logic to ensure sublex_done() is called again only via the
2721                 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2722         }
2723         else {
2724             PL_lex_state = LEX_INTERPCONCAT;
2725             PL_lex_repl = NULL;
2726         }
2727         if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2728             CopLINE(PL_curcop) +=
2729                 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2730                  + PL_parser->herelines;
2731             PL_parser->herelines = 0;
2732         }
2733         return PERLY_SLASH;
2734     }
2735     else {
2736         const line_t l = CopLINE(PL_curcop);
2737         LEAVE;
2738         if (PL_parser->sub_error_count != PL_error_count) {
2739             if (PL_parser->sub_no_recover) {
2740                 yyquit();
2741                 NOT_REACHED;
2742             }
2743         }
2744         if (PL_multi_close == '<')
2745             PL_parser->herelines += l - PL_multi_end;
2746         PL_bufend = SvPVX(PL_linestr);
2747         PL_bufend += SvCUR(PL_linestr);
2748         PL_expect = XOPERATOR;
2749         return SUBLEXEND;
2750     }
2751 }
2752
2753 HV *
2754 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2755                           const STRLEN context_len, const char ** error_msg)
2756 {
2757     /* Load the official _charnames module if not already there.  The
2758      * parameters are just to give info for any error messages generated:
2759      *  char_name   a name to look up which is the reason for loading this
2760      *  context     'char_name' in the context in the input in which it appears
2761      *  context_len how many bytes 'context' occupies
2762      *  error_msg   *error_msg will be set to any error
2763      *
2764      *  Returns the ^H table if success; otherwise NULL */
2765
2766     unsigned int i;
2767     HV * table;
2768     SV **cvp;
2769     SV * res;
2770
2771     PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2772
2773     /* This loop is executed 1 1/2 times.  On the first time through, if it
2774      * isn't already loaded, try loading it, and iterate just once to see if it
2775      * worked.  */
2776     for (i = 0; i < 2; i++) {
2777         table = GvHV(PL_hintgv);                 /* ^H */
2778
2779         if (    table
2780             && (PL_hints & HINT_LOCALIZE_HH)
2781             && (cvp = hv_fetchs(table, "charnames", FALSE))
2782             &&  SvOK(*cvp))
2783         {
2784             return table;   /* Quit if already loaded */
2785         }
2786
2787         if (i == 0) {
2788             Perl_load_module(aTHX_
2789                 0,
2790                 newSVpvs("_charnames"),
2791
2792                 /* version parameter; no need to specify it, as if we get too early
2793                 * a version, will fail anyway, not being able to find 'charnames'
2794                 * */
2795                 NULL,
2796                 newSVpvs(":full"),
2797                 newSVpvs(":short"),
2798                 NULL);
2799         }
2800     }
2801
2802     /* Here, it failed; new_constant will give appropriate error messages */
2803     *error_msg = NULL;
2804     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2805                         context, context_len, error_msg);
2806     SvREFCNT_dec(res);
2807
2808     return NULL;
2809 }
2810
2811 STATIC SV*
2812 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2813 {
2814     /* This justs wraps get_and_check_backslash_N_name() to output any error
2815      * message it returns. */
2816
2817     const char * error_msg = NULL;
2818     SV * result;
2819
2820     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2821
2822     /* charnames doesn't work well if there have been errors found */
2823     if (PL_error_count > 0) {
2824         return NULL;
2825     }
2826
2827     result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2828
2829     if (error_msg) {
2830         yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2831     }
2832
2833     return result;
2834 }
2835
2836 SV*
2837 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2838                                           const char* e,
2839                                           const bool is_utf8,
2840                                           const char ** error_msg)
2841 {
2842     /* <s> points to first character of interior of \N{}, <e> to one beyond the
2843      * interior, hence to the "}".  Finds what the name resolves to, returning
2844      * an SV* containing it; NULL if no valid one found.
2845      *
2846      * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2847      * doesn't have to be. */
2848
2849     SV* char_name;
2850     SV* res;
2851     HV * table;
2852     SV **cvp;
2853     SV *cv;
2854     SV *rv;
2855     HV *stash;
2856
2857     /* Points to the beginning of the \N{... so that any messages include the
2858      * context of what's failing*/
2859     const char* context = s - 3;
2860     STRLEN context_len = e - context + 1; /* include all of \N{...} */
2861
2862
2863     PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2864
2865     assert(e >= s);
2866     assert(s > (char *) 3);
2867
2868     while (s < e && isBLANK(*s)) {
2869         s++;
2870     }
2871
2872     while (s < e && isBLANK(*(e - 1))) {
2873         e--;
2874     }
2875
2876     char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2877
2878     if (!SvCUR(char_name)) {
2879         SvREFCNT_dec_NN(char_name);
2880         /* diag_listed_as: Unknown charname '%s' */
2881         *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2882         return NULL;
2883     }
2884
2885     /* Autoload the charnames module */
2886
2887     table = load_charnames(char_name, context, context_len, error_msg);
2888     if (table == NULL) {
2889         return NULL;
2890     }
2891
2892     *error_msg = NULL;
2893     res = new_constant( NULL, 0, "charnames", char_name, NULL,
2894                         context, context_len, error_msg);
2895     if (*error_msg) {
2896         *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2897
2898         SvREFCNT_dec(res);
2899         return NULL;
2900     }
2901
2902     /* See if the charnames handler is the Perl core's, and if so, we can skip
2903      * the validation needed for a user-supplied one, as Perl's does its own
2904      * validation. */
2905     cvp = hv_fetchs(table, "charnames", FALSE);
2906     if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2907         SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2908     {
2909         const char * const name = HvNAME(stash);
2910          if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2911            return res;
2912        }
2913     }
2914
2915     /* Here, it isn't Perl's charname handler.  We can't rely on a
2916      * user-supplied handler to validate the input name.  For non-ut8 input,
2917      * look to see that the first character is legal.  Then loop through the
2918      * rest checking that each is a continuation */
2919
2920     /* This code makes the reasonable assumption that the only Latin1-range
2921      * characters that begin a character name alias are alphabetic, otherwise
2922      * would have to create a isCHARNAME_BEGIN macro */
2923
2924     if (! is_utf8) {
2925         if (! isALPHAU(*s)) {
2926             goto bad_charname;
2927         }
2928         s++;
2929         while (s < e) {
2930             if (! isCHARNAME_CONT(*s)) {
2931                 goto bad_charname;
2932             }
2933             if (*s == ' ' && *(s-1) == ' ') {
2934                 goto multi_spaces;
2935             }
2936             s++;
2937         }
2938     }
2939     else {
2940         /* Similarly for utf8.  For invariants can check directly; for other
2941          * Latin1, can calculate their code point and check; otherwise  use an
2942          * inversion list */
2943         if (UTF8_IS_INVARIANT(*s)) {
2944             if (! isALPHAU(*s)) {
2945                 goto bad_charname;
2946             }
2947             s++;
2948         } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2949             if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2950                 goto bad_charname;
2951             }
2952             s += 2;
2953         }
2954         else {
2955             if (! _invlist_contains_cp(PL_utf8_charname_begin,
2956                                        utf8_to_uvchr_buf((U8 *) s,
2957                                                          (U8 *) e,
2958                                                          NULL)))
2959             {
2960                 goto bad_charname;
2961             }
2962             s += UTF8SKIP(s);
2963         }
2964
2965         while (s < e) {
2966             if (UTF8_IS_INVARIANT(*s)) {
2967                 if (! isCHARNAME_CONT(*s)) {
2968                     goto bad_charname;
2969                 }
2970                 if (*s == ' ' && *(s-1) == ' ') {
2971                     goto multi_spaces;
2972                 }
2973                 s++;
2974             }
2975             else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2976                 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2977                 {
2978                     goto bad_charname;
2979                 }
2980                 s += 2;
2981             }
2982             else {
2983                 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2984                                            utf8_to_uvchr_buf((U8 *) s,
2985                                                              (U8 *) e,
2986                                                              NULL)))
2987                 {
2988                     goto bad_charname;
2989                 }
2990                 s += UTF8SKIP(s);
2991             }
2992         }
2993     }
2994     if (*(s-1) == ' ') {
2995         /* diag_listed_as: charnames alias definitions may not contain
2996                            trailing white-space; marked by <-- HERE in %s
2997          */
2998         *error_msg = Perl_form(aTHX_
2999             "charnames alias definitions may not contain trailing "
3000             "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
3001             (int)(s - context + 1), context,
3002             (int)(e - s + 1), s + 1);
3003         return NULL;
3004     }
3005
3006     if (SvUTF8(res)) { /* Don't accept malformed charname value */
3007         const U8* first_bad_char_loc;
3008         STRLEN len;
3009         const char* const str = SvPV_const(res, len);
3010         if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
3011                                           &first_bad_char_loc)))
3012         {
3013             _force_out_malformed_utf8_message(first_bad_char_loc,
3014                                               (U8 *) PL_parser->bufend,
3015                                               0,
3016                                               0 /* 0 means don't die */ );
3017             /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
3018                                immediately after '%s' */
3019             *error_msg = Perl_form(aTHX_
3020                 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
3021                  (int) context_len, context,
3022                  (int) ((char *) first_bad_char_loc - str), str);
3023             return NULL;
3024         }
3025     }
3026
3027     return res;
3028
3029   bad_charname: {
3030
3031         /* The final %.*s makes sure that should the trailing NUL be missing
3032          * that this print won't run off the end of the string */
3033         /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
3034                            in \N{%s} */
3035         *error_msg = Perl_form(aTHX_
3036             "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3037             (int)(s - context + 1), context,
3038             (int)(e - s + 1), s + 1);
3039         return NULL;
3040     }
3041
3042   multi_spaces:
3043         /* diag_listed_as: charnames alias definitions may not contain a
3044                            sequence of multiple spaces; marked by <-- HERE
3045                            in %s */
3046         *error_msg = Perl_form(aTHX_
3047             "charnames alias definitions may not contain a sequence of "
3048             "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3049             (int)(s - context + 1), context,
3050             (int)(e - s + 1), s + 1);
3051         return NULL;
3052 }
3053
3054 /*
3055   scan_const
3056
3057   Extracts the next constant part of a pattern, double-quoted string,
3058   or transliteration.  This is terrifying code.
3059
3060   For example, in parsing the double-quoted string "ab\x63$d", it would
3061   stop at the '$' and return an OP_CONST containing 'abc'.
3062
3063   It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3064   processing a pattern (PL_lex_inpat is true), a transliteration
3065   (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3066
3067   Returns a pointer to the character scanned up to. If this is
3068   advanced from the start pointer supplied (i.e. if anything was
3069   successfully parsed), will leave an OP_CONST for the substring scanned
3070   in pl_yylval. Caller must intuit reason for not parsing further
3071   by looking at the next characters herself.
3072
3073   In patterns:
3074     expand:
3075       \N{FOO}  => \N{U+hex_for_character_FOO}
3076       (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3077
3078     pass through:
3079         all other \-char, including \N and \N{ apart from \N{ABC}
3080
3081     stops on:
3082         @ and $ where it appears to be a var, but not for $ as tail anchor
3083         \l \L \u \U \Q \E
3084         (?{  or  (??{ or (*{
3085
3086   In transliterations:
3087     characters are VERY literal, except for - not at the start or end
3088     of the string, which indicates a range.  However some backslash sequences
3089     are recognized: \r, \n, and the like
3090                     \007 \o{}, \x{}, \N{}
3091     If all elements in the transliteration are below 256,
3092     scan_const expands the range to the full set of intermediate
3093     characters. If the range is in utf8, the hyphen is replaced with
3094     a certain range mark which will be handled by pmtrans() in op.c.
3095
3096   In double-quoted strings:
3097     backslashes:
3098       all those recognized in transliterations
3099       deprecated backrefs: \1 (in substitution replacements)
3100       case and quoting: \U \Q \E
3101     stops on @ and $
3102
3103   scan_const does *not* construct ops to handle interpolated strings.
3104   It stops processing as soon as it finds an embedded $ or @ variable
3105   and leaves it to the caller to work out what's going on.
3106
3107   embedded arrays (whether in pattern or not) could be:
3108       @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3109
3110   $ in double-quoted strings must be the symbol of an embedded scalar.
3111
3112   $ in pattern could be $foo or could be tail anchor.  Assumption:
3113   it's a tail anchor if $ is the last thing in the string, or if it's
3114   followed by one of "()| \r\n\t"
3115
3116   \1 (backreferences) are turned into $1 in substitutions
3117
3118   The structure of the code is
3119       while (there's a character to process) {
3120           handle transliteration ranges
3121           skip regexp comments /(?#comment)/ and codes /(?{code})/ ((*{code})/
3122           skip #-initiated comments in //x patterns
3123           check for embedded arrays
3124           check for embedded scalars
3125           if (backslash) {
3126               deprecate \1 in substitution replacements
3127               handle string-changing backslashes \l \U \Q \E, etc.
3128               switch (what was escaped) {
3129                   handle \- in a transliteration (becomes a literal -)
3130                   if a pattern and not \N{, go treat as regular character
3131                   handle \132 (octal characters)
3132                   handle \x15 and \x{1234} (hex characters)
3133                   handle \N{name} (named characters, also \N{3,5} in a pattern)
3134                   handle \cV (control characters)
3135                   handle printf-style backslashes (\f, \r, \n, etc)
3136               } (end switch)
3137               continue
3138           } (end if backslash)
3139           handle regular character
3140     } (end while character to read)
3141
3142 */
3143
3144 STATIC char *
3145 S_scan_const(pTHX_ char *start)
3146 {
3147     const char * const send = PL_bufend;/* end of the constant */
3148     SV *sv = newSV(send - start);       /* sv for the constant.  See note below
3149                                            on sizing. */
3150     char *s = start;                    /* start of the constant */
3151     char *d = SvPVX(sv);                /* destination for copies */
3152     bool dorange = FALSE;               /* are we in a translit range? */
3153     bool didrange = FALSE;              /* did we just finish a range? */
3154     bool in_charclass = FALSE;          /* within /[...]/ */
3155     const bool s_is_utf8 = cBOOL(UTF);  /* Is the source string assumed to be
3156                                            UTF8?  But, this can show as true
3157                                            when the source isn't utf8, as for
3158                                            example when it is entirely composed
3159                                            of hex constants */
3160     bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
3161     STRLEN utf8_variant_count = 0;      /* When not in UTF-8, this counts the
3162                                            number of characters found so far
3163                                            that will expand (into 2 bytes)
3164                                            should we have to convert to
3165                                            UTF-8) */
3166     SV *res;                            /* result from charnames */
3167     STRLEN offset_to_max = 0;   /* The offset in the output to where the range
3168                                    high-end character is temporarily placed */
3169
3170     /* Does something require special handling in tr/// ?  This avoids extra
3171      * work in a less likely case.  As such, khw didn't feel it was worth
3172      * adding any branches to the more mainline code to handle this, which
3173      * means that this doesn't get set in some circumstances when things like
3174      * \x{100} get expanded out.  As a result there needs to be extra testing
3175      * done in the tr code */
3176     bool has_above_latin1 = FALSE;
3177
3178     /* Note on sizing:  The scanned constant is placed into sv, which is
3179      * initialized by newSV() assuming one byte of output for every byte of
3180      * input.  This routine expects newSV() to allocate an extra byte for a
3181      * trailing NUL, which this routine will append if it gets to the end of
3182      * the input.  There may be more bytes of input than output (eg., \N{LATIN
3183      * CAPITAL LETTER A}), or more output than input if the constant ends up
3184      * recoded to utf8, but each time a construct is found that might increase
3185      * the needed size, SvGROW() is called.  Its size parameter each time is
3186      * based on the best guess estimate at the time, namely the length used so
3187      * far, plus the length the current construct will occupy, plus room for
3188      * the trailing NUL, plus one byte for every input byte still unscanned */
3189
3190     UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3191                        before set */
3192 #ifdef EBCDIC
3193     int backslash_N = 0;            /* ? was the character from \N{} */
3194     int non_portable_endpoint = 0;  /* ? In a range is an endpoint
3195                                        platform-specific like \x65 */
3196 #endif
3197
3198     PERL_ARGS_ASSERT_SCAN_CONST;
3199
3200     assert(PL_lex_inwhat != OP_TRANSR);
3201
3202     /* Protect sv from errors and fatal warnings. */
3203     ENTER_with_name("scan_const");
3204     SAVEFREESV(sv);
3205
3206     /* A bunch of code in the loop below assumes that if s[n] exists and is not
3207      * NUL, then s[n+1] exists.  This assertion makes sure that assumption is
3208      * valid */
3209     assert(*send == '\0');
3210
3211     while (s < send
3212            || dorange   /* Handle tr/// range at right edge of input */
3213     ) {
3214
3215         /* get transliterations out of the way (they're most literal) */
3216         if (PL_lex_inwhat == OP_TRANS) {
3217
3218             /* But there isn't any special handling necessary unless there is a
3219              * range, so for most cases we just drop down and handle the value
3220              * as any other.  There are two exceptions.
3221              *
3222              * 1.  A hyphen indicates that we are actually going to have a
3223              *     range.  In this case, skip the '-', set a flag, then drop
3224              *     down to handle what should be the end range value.
3225              * 2.  After we've handled that value, the next time through, that
3226              *     flag is set and we fix up the range.
3227              *
3228              * Ranges entirely within Latin1 are expanded out entirely, in
3229              * order to make the transliteration a simple table look-up.
3230              * Ranges that extend above Latin1 have to be done differently, so
3231              * there is no advantage to expanding them here, so they are
3232              * stored here as Min, RANGE_INDICATOR, Max.  'RANGE_INDICATOR' is
3233              * a byte that can't occur in legal UTF-8, and hence can signify a
3234              * hyphen without any possible ambiguity.  On EBCDIC machines, if
3235              * the range is expressed as Unicode, the Latin1 portion is
3236              * expanded out even if the range extends above Latin1.  This is
3237              * because each code point in it has to be processed here
3238              * individually to get its native translation */
3239
3240             if (! dorange) {
3241
3242                 /* Here, we don't think we're in a range.  If the new character
3243                  * is not a hyphen; or if it is a hyphen, but it's too close to
3244                  * either edge to indicate a range, or if we haven't output any
3245                  * characters yet then it's a regular character. */
3246                 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3247                 {
3248
3249                     /* A regular character.  Process like any other, but first
3250                      * clear any flags */
3251                     didrange = FALSE;
3252                     dorange = FALSE;
3253 #ifdef EBCDIC
3254                     non_portable_endpoint = 0;
3255                     backslash_N = 0;
3256 #endif
3257                     /* The tests here for being above Latin1 and similar ones
3258                      * in the following 'else' suffice to find all such
3259                      * occurences in the constant, except those added by a
3260                      * backslash escape sequence, like \x{100}.  Mostly, those
3261                      * set 'has_above_latin1' as appropriate */
3262                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3263                         has_above_latin1 = TRUE;
3264                     }
3265
3266                     /* Drops down to generic code to process current byte */
3267                 }
3268                 else {  /* Is a '-' in the context where it means a range */
3269                     if (didrange) { /* Something like y/A-C-Z// */
3270                         Perl_croak(aTHX_ "Ambiguous range in transliteration"
3271                                          " operator");
3272                     }
3273
3274                     dorange = TRUE;
3275
3276                     s++;    /* Skip past the hyphen */
3277
3278                     /* d now points to where the end-range character will be
3279                      * placed.  Drop down to get that character.  We'll finish
3280                      * processing the range the next time through the loop */
3281
3282                     if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3283                         has_above_latin1 = TRUE;
3284                     }
3285
3286                     /* Drops down to generic code to process current byte */
3287                 }
3288             }  /* End of not a range */
3289             else {
3290                 /* Here we have parsed a range.  Now must handle it.  At this
3291                  * point:
3292                  * 'sv' is a SV* that contains the output string we are
3293                  *      constructing.  The final two characters in that string
3294                  *      are the range start and range end, in order.
3295                  * 'd'  points to just beyond the range end in the 'sv' string,
3296                  *      where we would next place something
3297                  */
3298                 char * max_ptr;
3299                 char * min_ptr;
3300                 IV range_min;
3301                 IV range_max;   /* last character in range */
3302                 STRLEN grow;
3303                 Size_t offset_to_min = 0;
3304                 Size_t extras = 0;
3305 #ifdef EBCDIC
3306                 bool convert_unicode;
3307                 IV real_range_max = 0;
3308 #endif
3309                 /* Get the code point values of the range ends. */
3310                 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3311                 offset_to_max = max_ptr - SvPVX_const(sv);
3312                 if (d_is_utf8) {
3313                     /* We know the utf8 is valid, because we just constructed
3314                      * it ourselves in previous loop iterations */
3315                     min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3316                     range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3317                     range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3318
3319                     /* This compensates for not all code setting
3320                      * 'has_above_latin1', so that we don't skip stuff that
3321                      * should be executed */
3322                     if (range_max > 255) {
3323                         has_above_latin1 = TRUE;
3324                     }
3325                 }
3326                 else {
3327                     min_ptr = max_ptr - 1;
3328                     range_min = * (U8*) min_ptr;
3329                     range_max = * (U8*) max_ptr;
3330                 }
3331
3332                 /* If the range is just a single code point, like tr/a-a/.../,
3333                  * that code point is already in the output, twice.  We can
3334                  * just back up over the second instance and avoid all the rest
3335                  * of the work.  But if it is a variant character, it's been
3336                  * counted twice, so decrement.  (This unlikely scenario is
3337                  * special cased, like the one for a range of 2 code points
3338                  * below, only because the main-line code below needs a range
3339                  * of 3 or more to work without special casing.  Might as well
3340                  * get it out of the way now.) */
3341                 if (UNLIKELY(range_max == range_min)) {
3342                     d = max_ptr;
3343                     if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3344                         utf8_variant_count--;
3345                     }
3346                     goto range_done;
3347                 }
3348
3349 #ifdef EBCDIC
3350                 /* On EBCDIC platforms, we may have to deal with portable
3351                  * ranges.  These happen if at least one range endpoint is a
3352                  * Unicode value (\N{...}), or if the range is a subset of
3353                  * [A-Z] or [a-z], and both ends are literal characters,
3354                  * like 'A', and not like \x{C1} */
3355                 convert_unicode =
3356                                cBOOL(backslash_N)   /* \N{} forces Unicode,
3357                                                        hence portable range */
3358                     || (     ! non_portable_endpoint
3359                         && ((  isLOWER_A(range_min) && isLOWER_A(range_max))
3360                            || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3361                 if (convert_unicode) {
3362
3363                     /* Special handling is needed for these portable ranges.
3364                      * They are defined to be in Unicode terms, which includes
3365                      * all the Unicode code points between the end points.
3366                      * Convert to Unicode to get the Unicode range.  Later we
3367                      * will convert each code point in the range back to
3368                      * native.  */
3369                     range_min = NATIVE_TO_UNI(range_min);
3370                     range_max = NATIVE_TO_UNI(range_max);
3371                 }
3372 #endif
3373
3374                 if (range_min > range_max) {
3375 #ifdef EBCDIC
3376                     if (convert_unicode) {
3377                         /* Need to convert back to native for meaningful
3378                          * messages for this platform */
3379                         range_min = UNI_TO_NATIVE(range_min);
3380                         range_max = UNI_TO_NATIVE(range_max);
3381                     }
3382 #endif
3383                     /* Use the characters themselves for the error message if
3384                      * ASCII printables; otherwise some visible representation
3385                      * of them */
3386                     if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3387                         Perl_croak(aTHX_
3388                          "Invalid range \"%c-%c\" in transliteration operator",
3389                          (char)range_min, (char)range_max);
3390                     }
3391 #ifdef EBCDIC
3392                     else if (convert_unicode) {
3393         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3394                         Perl_croak(aTHX_
3395                            "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3396                            UVXf "}\" in transliteration operator",
3397                            range_min, range_max);
3398                     }
3399 #endif
3400                     else {
3401         /* diag_listed_as: Invalid range "%s" in transliteration operator */
3402                         Perl_croak(aTHX_
3403                            "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3404                            " in transliteration operator",
3405                            range_min, range_max);
3406                     }
3407                 }
3408
3409                 /* If the range is exactly two code points long, they are
3410                  * already both in the output */
3411                 if (UNLIKELY(range_min + 1 == range_max)) {
3412                     goto range_done;
3413                 }
3414
3415                 /* Here the range contains at least 3 code points */
3416
3417                 if (d_is_utf8) {
3418
3419                     /* If everything in the transliteration is below 256, we
3420                      * can avoid special handling later.  A translation table
3421                      * for each of those bytes is created by op.c.  So we
3422                      * expand out all ranges to their constituent code points.
3423                      * But if we've encountered something above 255, the
3424                      * expanding won't help, so skip doing that.  But if it's
3425                      * EBCDIC, we may have to look at each character below 256
3426                      * if we have to convert to/from Unicode values */
3427                     if (   has_above_latin1
3428 #ifdef EBCDIC
3429                         && (range_min > 255 || ! convert_unicode)
3430 #endif
3431                     ) {
3432                         const STRLEN off = d - SvPVX(sv);
3433                         const STRLEN extra = 1 + (send - s) + 1;
3434                         char *e;
3435
3436                         /* Move the high character one byte to the right; then
3437                          * insert between it and the range begin, an illegal
3438                          * byte which serves to indicate this is a range (using
3439                          * a '-' would be ambiguous). */
3440
3441                         if (off + extra > SvLEN(sv)) {
3442                             d = off + SvGROW(sv, off + extra);
3443                             max_ptr = d - off + offset_to_max;
3444                         }
3445
3446                         e = d++;
3447                         while (e-- > max_ptr) {
3448                             *(e + 1) = *e;
3449                         }
3450                         *(e + 1) = (char) RANGE_INDICATOR;
3451                         goto range_done;
3452                     }
3453
3454                     /* Here, we're going to expand out the range.  For EBCDIC
3455                      * the range can extend above 255 (not so in ASCII), so
3456                      * for EBCDIC, split it into the parts above and below
3457                      * 255/256 */
3458 #ifdef EBCDIC
3459                     if (range_max > 255) {
3460                         real_range_max = range_max;
3461                         range_max = 255;
3462                     }
3463 #endif
3464                 }
3465
3466                 /* Here we need to expand out the string to contain each
3467                  * character in the range.  Grow the output to handle this.
3468                  * For non-UTF8, we need a byte for each code point in the
3469                  * range, minus the three that we've already allocated for: the
3470                  * hyphen, the min, and the max.  For UTF-8, we need this
3471                  * plus an extra byte for each code point that occupies two
3472                  * bytes (is variant) when in UTF-8 (except we've already
3473                  * allocated for the end points, including if they are
3474                  * variants).  For ASCII platforms and Unicode ranges on EBCDIC
3475                  * platforms, it's easy to calculate a precise number.  To
3476                  * start, we count the variants in the range, which we need
3477                  * elsewhere in this function anyway.  (For the case where it
3478                  * isn't easy to calculate, 'extras' has been initialized to 0,
3479                  * and the calculation is done in a loop further down.) */
3480 #ifdef EBCDIC
3481                 if (convert_unicode)
3482 #endif
3483                 {
3484                     /* This is executed unconditionally on ASCII, and for
3485                      * Unicode ranges on EBCDIC.  Under these conditions, all
3486                      * code points above a certain value are variant; and none
3487                      * under that value are.  We just need to find out how much
3488                      * of the range is above that value.  We don't count the
3489                      * end points here, as they will already have been counted
3490                      * as they were parsed. */
3491                     if (range_min >= UTF_CONTINUATION_MARK) {
3492
3493                         /* The whole range is made up of variants */
3494                         extras = (range_max - 1) - (range_min + 1) + 1;
3495                     }
3496                     else if (range_max >= UTF_CONTINUATION_MARK) {
3497
3498                         /* Only the higher portion of the range is variants */
3499                         extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3500                     }
3501
3502                     utf8_variant_count += extras;
3503                 }
3504
3505                 /* The base growth is the number of code points in the range,
3506                  * not including the endpoints, which have already been sized
3507                  * for (and output).  We don't subtract for the hyphen, as it
3508                  * has been parsed but not output, and the SvGROW below is
3509                  * based only on what's been output plus what's left to parse.
3510                  * */
3511                 grow = (range_max - 1) - (range_min + 1) + 1;
3512
3513                 if (d_is_utf8) {
3514 #ifdef EBCDIC
3515                     /* In some cases in EBCDIC, we haven't yet calculated a
3516                      * precise amount needed for the UTF-8 variants.  Just
3517                      * assume the worst case, that everything will expand by a
3518                      * byte */
3519                     if (! convert_unicode) {
3520                         grow *= 2;
3521                     }
3522                     else
3523 #endif
3524                     {
3525                         /* Otherwise we know exactly how many variants there
3526                          * are in the range. */
3527                         grow += extras;
3528                     }
3529                 }
3530
3531                 /* Grow, but position the output to overwrite the range min end
3532                  * point, because in some cases we overwrite that */
3533                 SvCUR_set(sv, d - SvPVX_const(sv));
3534                 offset_to_min = min_ptr - SvPVX_const(sv);
3535
3536                 /* See Note on sizing above. */
3537                 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3538                                              + (send - s)
3539                                              + grow
3540                                              + 1 /* Trailing NUL */ );
3541
3542                 /* Now, we can expand out the range. */
3543 #ifdef EBCDIC
3544                 if (convert_unicode) {
3545                     SSize_t i;
3546
3547                     /* Recall that the min and max are now in Unicode terms, so
3548                      * we have to convert each character to its native
3549                      * equivalent */
3550                     if (d_is_utf8) {
3551                         for (i = range_min; i <= range_max; i++) {
3552                             append_utf8_from_native_byte(
3553                                                     LATIN1_TO_NATIVE((U8) i),
3554                                                     (U8 **) &d);
3555                         }
3556                     }
3557                     else {
3558                         for (i = range_min; i <= range_max; i++) {
3559                             *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3560                         }
3561                     }
3562                 }
3563                 else
3564 #endif
3565                 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3566                 {
3567                     /* Here, no conversions are necessary, which means that the
3568                      * first character in the range is already in 'd' and
3569                      * valid, so we can skip overwriting it */
3570                     if (d_is_utf8) {
3571                         SSize_t i;
3572                         d += UTF8SKIP(d);
3573                         for (i = range_min + 1; i <= range_max; i++) {
3574                             append_utf8_from_native_byte((U8) i, (U8 **) &d);
3575                         }
3576                     }
3577                     else {
3578                         SSize_t i;
3579                         d++;
3580                         assert(range_min + 1 <= range_max);
3581                         for (i = range_min + 1; i < range_max; i++) {
3582 #ifdef EBCDIC
3583                             /* In this case on EBCDIC, we haven't calculated
3584                              * the variants.  Do it here, as we go along */
3585                             if (! UVCHR_IS_INVARIANT(i)) {
3586                                 utf8_variant_count++;
3587                             }
3588 #endif
3589                             *d++ = (char)i;
3590                         }
3591
3592                         /* The range_max is done outside the loop so as to
3593                          * avoid having to special case not incrementing
3594                          * 'utf8_variant_count' on EBCDIC (it's already been
3595                          * counted when originally parsed) */
3596                         *d++ = (char) range_max;
3597                     }
3598                 }
3599
3600 #ifdef EBCDIC
3601                 /* If the original range extended above 255, add in that
3602                  * portion. */
3603                 if (real_range_max) {
3604                     *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3605                     *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3606                     if (real_range_max > 0x100) {
3607                         if (real_range_max > 0x101) {
3608                             *d++ = (char) RANGE_INDICATOR;
3609                         }
3610                         d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3611                     }
3612                 }
3613 #endif
3614
3615               range_done:
3616                 /* mark the range as done, and continue */
3617                 didrange = TRUE;
3618                 dorange = FALSE;
3619 #ifdef EBCDIC
3620                 non_portable_endpoint = 0;
3621                 backslash_N = 0;
3622 #endif
3623                 continue;
3624             } /* End of is a range */
3625         } /* End of transliteration.  Joins main code after these else's */
3626         else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3627             char *s1 = s-1;
3628             int esc = 0;
3629             while (s1 >= start && *s1-- == '\\')
3630                 esc = !esc;
3631             if (!esc)
3632                 in_charclass = TRUE;
3633         }
3634         else if (*s == ']' && PL_lex_inpat && in_charclass) {
3635             char *s1 = s-1;
3636             int esc = 0;
3637             while (s1 >= start && *s1-- == '\\')
3638                 esc = !esc;
3639             if (!esc)
3640                 in_charclass = FALSE;
3641         }
3642             /* skip for regexp comments /(?#comment)/, except for the last
3643              * char, which will be done separately.  Stop on (?{..}) and
3644              * friends (??{ ... }) or (*{ ... }) */
3645         else if (*s == '(' && PL_lex_inpat && (s[1] == '?' || s[1] == '*') && !in_charclass) {
3646             if (s[1] == '?' && s[2] == '#') {
3647                 if (s_is_utf8) {
3648                     PERL_UINT_FAST8_T  len = UTF8SKIP(s);
3649
3650                     while (s + len < send && *s != ')') {
3651                         Copy(s, d, len, U8);
3652                         d += len;
3653                         s += len;
3654                         len = UTF8_SAFE_SKIP(s, send);
3655                     }
3656                 }
3657                 else while (s+1 < send && *s != ')') {
3658                     *d++ = *s++;
3659                 }
3660             }
3661             else
3662             if (!PL_lex_casemods &&
3663                 /* The following should match regcomp.c */
3664                 ((s[1] == '?' && (s[2] == '{'                        /* (?{ ... })  */
3665                               || (s[2] == '?' && s[3] == '{'))) ||   /* (??{ ... }) */
3666                  (s[1] == '*' && (s[2] == '{' )))                    /* (*{ ... })  */
3667             ){
3668                 break;
3669             }
3670         }
3671             /* likewise skip #-initiated comments in //x patterns */
3672         else if (*s == '#'
3673                  && PL_lex_inpat
3674                  && !in_charclass
3675                  && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3676         {
3677             while (s < send && *s != '\n')
3678                 *d++ = *s++;
3679         }
3680             /* no further processing of single-quoted regex */
3681         else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3682             goto default_action;
3683
3684             /* check for embedded arrays
3685              * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3686              */
3687         else if (*s == '@' && s[1]) {
3688             if (UTF
3689                ? isIDFIRST_utf8_safe(s+1, send)
3690                : isWORDCHAR_A(s[1]))
3691             {
3692                 break;
3693             }
3694             if (memCHRs(":'{$", s[1]))
3695                 break;
3696             if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3697                 break; /* in regexp, neither @+ nor @- are interpolated */
3698         }
3699             /* check for embedded scalars.  only stop if we're sure it's a
3700              * variable.  */
3701         else if (*s == '$') {
3702             if (!PL_lex_inpat)  /* not a regexp, so $ must be var */
3703                 break;
3704             if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3705                 if (s[1] == '\\') {
3706                     Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3707                                    "Possible unintended interpolation of $\\ in regex");
3708                 }
3709                 break;          /* in regexp, $ might be tail anchor */
3710             }
3711         }
3712
3713         /* End of else if chain - OP_TRANS rejoin rest */
3714
3715         if (UNLIKELY(s >= send)) {
3716             assert(s == send);
3717             break;
3718         }
3719
3720         /* backslashes */
3721         if (*s == '\\' && s+1 < send) {
3722             char* bslash = s;   /* point to beginning \ */
3723             char* rbrace;       /* point to ending '}' */
3724             char* e;            /* 1 past the meat (non-blanks) before the
3725                                    brace */
3726             s++;
3727
3728             /* warn on \1 - \9 in substitution replacements, but note that \11
3729              * is an octal; and \19 is \1 followed by '9' */
3730             if (PL_lex_inwhat == OP_SUBST
3731                 && !PL_lex_inpat
3732                 && isDIGIT(*s)
3733                 && *s != '0'
3734                 && !isDIGIT(s[1]))
3735             {
3736                 /* diag_listed_as: \%d better written as $%d */
3737                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3738                 s = bslash;
3739                 *s = '$';
3740                 break;
3741             }
3742
3743             /* string-change backslash escapes */
3744             if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3745                 s = bslash;
3746                 break;
3747             }
3748             /* In a pattern, process \N, but skip any other backslash escapes.
3749              * This is because we don't want to translate an escape sequence
3750              * into a meta symbol and have the regex compiler use the meta
3751              * symbol meaning, e.g. \x{2E} would be confused with a dot.  But
3752              * in spite of this, we do have to process \N here while the proper
3753              * charnames handler is in scope.  See bugs #56444 and #62056.
3754              *
3755              * There is a complication because \N in a pattern may also stand
3756              * for 'match a non-nl', and not mean a charname, in which case its
3757              * processing should be deferred to the regex compiler.  To be a
3758              * charname it must be followed immediately by a '{', and not look
3759              * like \N followed by a curly quantifier, i.e., not something like
3760              * \N{3,}.  regcurly returns a boolean indicating if it is a legal
3761              * quantifier */
3762             else if (PL_lex_inpat
3763                     && (*s != 'N'
3764                         || s[1] != '{'
3765                         || regcurly(s + 1, send, NULL)))
3766             {
3767                 *d++ = '\\';
3768                 goto default_action;
3769             }
3770
3771             switch (*s) {
3772             default:
3773                 {
3774                     if ((isALPHANUMERIC(*s)))
3775                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3776                                        "Unrecognized escape \\%c passed through",
3777                                        *s);
3778                     /* default action is to copy the quoted character */
3779                     goto default_action;
3780                 }
3781
3782             /* eg. \132 indicates the octal constant 0132 */
3783             case '0': case '1': case '2': case '3':
3784             case '4': case '5': case '6': case '7':
3785                 {
3786                     I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3787                               | PERL_SCAN_NOTIFY_ILLDIGIT;
3788                     STRLEN len = 3;
3789                     uv = grok_oct(s, &len, &flags, NULL);
3790                     s += len;
3791                     if (  (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3792                         && s < send
3793                         && isDIGIT(*s)  /* like \08, \178 */
3794                         && ckWARN(WARN_MISC))
3795                     {
3796                         Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3797                             form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3798                     }
3799                 }
3800                 goto NUM_ESCAPE_INSERT;
3801
3802             /* eg. \o{24} indicates the octal constant \024 */
3803             case 'o':
3804                 {
3805                     const char* error;
3806
3807                     if (! grok_bslash_o(&s, send,
3808                                                &uv, &error,
3809                                                NULL,
3810                                                FALSE, /* Not strict */
3811                                                FALSE, /* No illegal cp's */
3812                                                UTF))
3813                     {
3814                         yyerror(error);
3815                         uv = 0; /* drop through to ensure range ends are set */
3816                     }
3817                     goto NUM_ESCAPE_INSERT;
3818                 }
3819
3820             /* eg. \x24 indicates the hex constant 0x24 */
3821             case 'x':
3822                 {
3823                     const char* error;
3824
3825                     if (! grok_bslash_x(&s, send,
3826                                                &uv, &error,
3827                                                NULL,
3828                                                FALSE, /* Not strict */
3829                                                FALSE, /* No illegal cp's */
3830                                                UTF))
3831                     {
3832                         yyerror(error);
3833                         uv = 0; /* drop through to ensure range ends are set */
3834                     }
3835                 }
3836
3837               NUM_ESCAPE_INSERT:
3838                 /* Insert oct or hex escaped character. */
3839
3840                 /* Here uv is the ordinal of the next character being added */
3841                 if (UVCHR_IS_INVARIANT(uv)) {
3842                     *d++ = (char) uv;
3843                 }
3844                 else {
3845                     if (!d_is_utf8 && uv > 255) {
3846
3847                         /* Here, 'uv' won't fit unless we convert to UTF-8.
3848                          * If we've only seen invariants so far, all we have to
3849                          * do is turn on the flag */
3850                         if (utf8_variant_count == 0) {
3851                             SvUTF8_on(sv);
3852                         }
3853                         else {
3854                             SvCUR_set(sv, d - SvPVX_const(sv));
3855                             SvPOK_on(sv);
3856                             *d = '\0';
3857
3858                             sv_utf8_upgrade_flags_grow(
3859                                            sv,
3860                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3861
3862                                            /* Since we're having to grow here,
3863                                             * make sure we have enough room for
3864                                             * this escape and a NUL, so the
3865                                             * code immediately below won't have
3866                                             * to actually grow again */
3867                                           UVCHR_SKIP(uv)
3868                                         + (STRLEN)(send - s) + 1);
3869                             d = SvPVX(sv) + SvCUR(sv);
3870                         }
3871
3872                         has_above_latin1 = TRUE;
3873                         d_is_utf8 = TRUE;
3874                     }
3875
3876                     if (! d_is_utf8) {
3877                         *d++ = (char)uv;
3878                         utf8_variant_count++;
3879                     }
3880                     else {
3881                        /* Usually, there will already be enough room in 'sv'
3882                         * since such escapes are likely longer than any UTF-8
3883                         * sequence they can end up as.  This isn't the case on
3884                         * EBCDIC where \x{40000000} contains 12 bytes, and the
3885                         * UTF-8 for it contains 14.  And, we have to allow for
3886                         * a trailing NUL.  It probably can't happen on ASCII
3887                         * platforms, but be safe.  See Note on sizing above. */
3888                         const STRLEN needed = d - SvPVX(sv)
3889                                             + UVCHR_SKIP(uv)
3890                                             + (send - s)
3891                                             + 1;
3892                         if (UNLIKELY(needed > SvLEN(sv))) {
3893                             SvCUR_set(sv, d - SvPVX_const(sv));
3894                             d = SvCUR(sv) + SvGROW(sv, needed);
3895                         }
3896
3897                         d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3898                                                    (ckWARN(WARN_PORTABLE))
3899                                                    ? UNICODE_WARN_PERL_EXTENDED
3900                                                    : 0);
3901                     }
3902                 }
3903 #ifdef EBCDIC
3904                 non_portable_endpoint++;
3905 #endif
3906                 continue;
3907
3908             case 'N':
3909                 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3910                  * named character, like \N{LATIN SMALL LETTER A}, or a named
3911                  * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3912                  * GRAVE} (except y/// can't handle the latter, croaking).  For
3913                  * convenience all three forms are referred to as "named
3914                  * characters" below.
3915                  *
3916                  * For patterns, \N also can mean to match a non-newline.  Code
3917                  * before this 'switch' statement should already have handled
3918                  * this situation, and hence this code only has to deal with
3919                  * the named character cases.
3920                  *
3921                  * For non-patterns, the named characters are converted to
3922                  * their string equivalents.  In patterns, named characters are
3923                  * not converted to their ultimate forms for the same reasons
3924                  * that other escapes aren't (mainly that the ultimate
3925                  * character could be considered a meta-symbol by the regex
3926                  * compiler).  Instead, they are converted to the \N{U+...}
3927                  * form to get the value from the charnames that is in effect
3928                  * right now, while preserving the fact that it was a named
3929                  * character, so that the regex compiler knows this.
3930                  *
3931                  * The structure of this section of code (besides checking for
3932                  * errors and upgrading to utf8) is:
3933                  *    If the named character is of the form \N{U+...}, pass it
3934                  *      through if a pattern; otherwise convert the code point
3935                  *      to utf8
3936                  *    Otherwise must be some \N{NAME}: convert to
3937                  *      \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3938                  *
3939                  * Transliteration is an exception.  The conversion to utf8 is
3940                  * only done if the code point requires it to be representable.
3941                  *
3942                  * Here, 's' points to the 'N'; the test below is guaranteed to
3943                  * succeed if we are being called on a pattern, as we already
3944                  * know from a test above that the next character is a '{'.  A
3945                  * non-pattern \N must mean 'named character', which requires
3946                  * braces */
3947                 s++;
3948                 if (*s != '{') {
3949                     yyerror("Missing braces on \\N{}");
3950                     *d++ = '\0';
3951                     continue;
3952                 }
3953                 s++;
3954
3955                 /* If there is no matching '}', it is an error. */
3956                 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3957                     if (! PL_lex_inpat) {
3958                         yyerror("Missing right brace on \\N{}");
3959                     } else {
3960                         yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3961                     }
3962                     yyquit(); /* Have exhausted the input. */
3963                 }
3964
3965                 /* Here it looks like a named character */
3966                 while (s < rbrace && isBLANK(*s)) {
3967                     s++;
3968                 }
3969
3970                 e = rbrace;
3971                 while (s < e && isBLANK(*(e - 1))) {
3972                     e--;
3973                 }
3974
3975                 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3976                     s += 2;         /* Skip to next char after the 'U+' */
3977                     if (PL_lex_inpat) {
3978
3979                         /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3980                         /* Check the syntax.  */
3981                         if (!isXDIGIT(*s)) {
3982                           bad_NU:
3983                             yyerror(
3984                                 "Invalid hexadecimal number in \\N{U+...}"
3985                             );
3986                             s = rbrace + 1;
3987                             *d++ = '\0';
3988                             continue;
3989                         }
3990                         while (++s < e) {
3991                             if (isXDIGIT(*s))
3992                                 continue;
3993                             else if ((*s == '.' || *s == '_')
3994                                   && isXDIGIT(s[1]))
3995                                 continue;
3996                             goto bad_NU;
3997                         }
3998
3999                         /* Pass everything through unchanged.
4000                          * +1 is to include the '}' */
4001                         Copy(bslash, d, rbrace - bslash + 1, char);
4002                         d += rbrace - bslash + 1;
4003                     }
4004                     else {  /* Not a pattern: convert the hex to string */
4005                         I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
4006                                   | PERL_SCAN_SILENT_ILLDIGIT
4007                                   | PERL_SCAN_SILENT_OVERFLOW
4008                                   | PERL_SCAN_DISALLOW_PREFIX;
4009                         STRLEN len = e - s;
4010
4011                         uv = grok_hex(s, &len, &flags, NULL);
4012                         if (len == 0 || (len != (STRLEN)(e - s)))
4013                             goto bad_NU;
4014
4015                         if (    uv > MAX_LEGAL_CP
4016                             || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
4017                         {
4018                             yyerror(form_cp_too_large_msg(16, s, len, 0));
4019                             uv = 0; /* drop through to ensure range ends are
4020                                        set */
4021                         }
4022
4023                          /* For non-tr///, if the destination is not in utf8,
4024                           * unconditionally recode it to be so.  This is
4025                           * because \N{} implies Unicode semantics, and scalars
4026                           * have to be in utf8 to guarantee those semantics.
4027                           * tr/// doesn't care about Unicode rules, so no need
4028                           * there to upgrade to UTF-8 for small enough code
4029                           * points */
4030                         if (! d_is_utf8 && (   uv > 0xFF
4031                                            || PL_lex_inwhat != OP_TRANS))
4032                         {
4033                             /* See Note on sizing above.  */
4034                             const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
4035
4036                             SvCUR_set(sv, d - SvPVX_const(sv));
4037                             SvPOK_on(sv);
4038                             *d = '\0';
4039
4040                             if (utf8_variant_count == 0) {
4041                                 SvUTF8_on(sv);
4042                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4043                             }
4044                             else {
4045                                 sv_utf8_upgrade_flags_grow(
4046                                                sv,
4047                                                SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4048                                                extra);
4049                                 d = SvPVX(sv) + SvCUR(sv);
4050                             }
4051
4052                             d_is_utf8 = TRUE;
4053                             has_above_latin1 = TRUE;
4054                         }
4055
4056                         /* Add the (Unicode) code point to the output. */
4057                         if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4058                             *d++ = (char) LATIN1_TO_NATIVE(uv);
4059                         }
4060                         else {
4061                             d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4062                                                    (ckWARN(WARN_PORTABLE))
4063                                                    ? UNICODE_WARN_PERL_EXTENDED
4064                                                    : 0);
4065                         }
4066                     }
4067                 }
4068                 else     /* Here is \N{NAME} but not \N{U+...}. */
4069                      if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4070                 {   /* Failed.  We should die eventually, but for now use a NUL
4071                        to keep parsing */
4072                     *d++ = '\0';
4073                 }
4074                 else {  /* Successfully evaluated the name */
4075                     STRLEN len;
4076                     const char *str = SvPV_const(res, len);
4077                     if (PL_lex_inpat) {
4078
4079                         if (! len) { /* The name resolved to an empty string */
4080                             const char empty_N[] = "\\N{_}";
4081                             Copy(empty_N, d, sizeof(empty_N) - 1, char);
4082                             d += sizeof(empty_N) - 1;
4083                         }
4084                         else {
4085                             /* In order to not lose information for the regex
4086                             * compiler, pass the result in the specially made
4087                             * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4088                             * the code points in hex of each character
4089                             * returned by charnames */
4090
4091                             const char *str_end = str + len;
4092                             const STRLEN off = d - SvPVX_const(sv);
4093
4094                             if (! SvUTF8(res)) {
4095                                 /* For the non-UTF-8 case, we can determine the
4096                                  * exact length needed without having to parse
4097                                  * through the string.  Each character takes up
4098                                  * 2 hex digits plus either a trailing dot or
4099                                  * the "}" */
4100                                 const char initial_text[] = "\\N{U+";
4101                                 const STRLEN initial_len = sizeof(initial_text)
4102                                                            - 1;
4103                                 d = off + SvGROW(sv, off
4104                                                     + 3 * len
4105
4106                                                     /* +1 for trailing NUL */
4107                                                     + initial_len + 1
4108
4109                                                     + (STRLEN)(send - rbrace));
4110                                 Copy(initial_text, d, initial_len, char);
4111                                 d += initial_len;
4112                                 while (str < str_end) {
4113                                     char hex_string[4];
4114                                     int len =
4115                                         my_snprintf(hex_string,
4116                                                   sizeof(hex_string),
4117                                                   "%02X.",
4118
4119                                                   /* The regex compiler is
4120                                                    * expecting Unicode, not
4121                                                    * native */
4122                                                   NATIVE_TO_LATIN1(*str));
4123                                     PERL_MY_SNPRINTF_POST_GUARD(len,
4124                                                            sizeof(hex_string));
4125                                     Copy(hex_string, d, 3, char);
4126                                     d += 3;
4127                                     str++;
4128                                 }
4129                                 d--;    /* Below, we will overwrite the final
4130                                            dot with a right brace */
4131                             }
4132                             else {
4133                                 STRLEN char_length; /* cur char's byte length */
4134
4135                                 /* and the number of bytes after this is
4136                                  * translated into hex digits */
4137                                 STRLEN output_length;
4138
4139                                 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4140                                  * for max('U+', '.'); and 1 for NUL */
4141                                 char hex_string[2 * UTF8_MAXBYTES + 5];
4142
4143                                 /* Get the first character of the result. */
4144                                 U32 uv = utf8n_to_uvchr((U8 *) str,
4145                                                         len,
4146                                                         &char_length,
4147                                                         UTF8_ALLOW_ANYUV);
4148                                 /* Convert first code point to Unicode hex,
4149                                  * including the boiler plate before it. */
4150                                 output_length =
4151                                     my_snprintf(hex_string, sizeof(hex_string),
4152                                              "\\N{U+%X",
4153                                              (unsigned int) NATIVE_TO_UNI(uv));
4154
4155                                 /* Make sure there is enough space to hold it */
4156                                 d = off + SvGROW(sv, off
4157                                                     + output_length
4158                                                     + (STRLEN)(send - rbrace)
4159                                                     + 2);       /* '}' + NUL */
4160                                 /* And output it */
4161                                 Copy(hex_string, d, output_length, char);
4162                                 d += output_length;
4163
4164                                 /* For each subsequent character, append dot and
4165                                 * its Unicode code point in hex */
4166                                 while ((str += char_length) < str_end) {
4167                                     const STRLEN off = d - SvPVX_const(sv);
4168                                     U32 uv = utf8n_to_uvchr((U8 *) str,
4169                                                             str_end - str,
4170                                                             &char_length,
4171                                                             UTF8_ALLOW_ANYUV);
4172                                     output_length =
4173                                         my_snprintf(hex_string,
4174                                              sizeof(hex_string),
4175                                              ".%X",
4176                                              (unsigned int) NATIVE_TO_UNI(uv));
4177
4178                                     d = off + SvGROW(sv, off
4179                                                         + output_length
4180                                                         + (STRLEN)(send - rbrace)
4181                                                         + 2);   /* '}' +  NUL */
4182                                     Copy(hex_string, d, output_length, char);
4183                                     d += output_length;
4184                                 }
4185                             }
4186
4187                             *d++ = '}'; /* Done.  Add the trailing brace */
4188                         }
4189                     }
4190                     else { /* Here, not in a pattern.  Convert the name to a
4191                             * string. */
4192
4193                         if (PL_lex_inwhat == OP_TRANS) {
4194                             str = SvPV_const(res, len);
4195                             if (len > ((SvUTF8(res))
4196                                        ? UTF8SKIP(str)
4197                                        : 1U))
4198                             {
4199                                 yyerror(Perl_form(aTHX_
4200                                     "%.*s must not be a named sequence"
4201                                     " in transliteration operator",
4202                                         /*  +1 to include the "}" */
4203                                     (int) (rbrace + 1 - start), start));
4204                                 *d++ = '\0';
4205                                 goto end_backslash_N;
4206                             }
4207
4208                             if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4209                                 has_above_latin1 = TRUE;
4210                             }
4211
4212                         }
4213                         else if (! SvUTF8(res)) {
4214                             /* Make sure \N{} return is UTF-8.  This is because
4215                              * \N{} implies Unicode semantics, and scalars have
4216                              * to be in utf8 to guarantee those semantics; but
4217                              * not needed in tr/// */
4218                             sv_utf8_upgrade_flags(res, 0);
4219                             str = SvPV_const(res, len);
4220                         }
4221
4222                          /* Upgrade destination to be utf8 if this new
4223                           * component is */
4224                         if (! d_is_utf8 && SvUTF8(res)) {
4225                             /* See Note on sizing above.  */
4226                             const STRLEN extra = len + (send - s) + 1;
4227
4228                             SvCUR_set(sv, d - SvPVX_const(sv));
4229                             SvPOK_on(sv);
4230                             *d = '\0';
4231
4232                             if (utf8_variant_count == 0) {
4233                                 SvUTF8_on(sv);
4234                                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4235                             }
4236                             else {
4237                                 sv_utf8_upgrade_flags_grow(sv,
4238                                                 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4239                                                 extra);
4240                                 d = SvPVX(sv) + SvCUR(sv);
4241                             }
4242                             d_is_utf8 = TRUE;
4243                         } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4244
4245                             /* See Note on sizing above.  (NOTE: SvCUR() is not
4246                              * set correctly here). */
4247                             const STRLEN extra = len + (send - rbrace) + 1;
4248                             const STRLEN off = d - SvPVX_const(sv);
4249                             d = off + SvGROW(sv, off + extra);
4250                         }
4251                         Copy(str, d, len, char);
4252                         d += len;
4253                     }
4254
4255                     SvREFCNT_dec(res);
4256
4257                 } /* End \N{NAME} */
4258
4259               end_backslash_N:
4260 #ifdef EBCDIC
4261                 backslash_N++; /* \N{} is defined to be Unicode */
4262 #endif
4263                 s = rbrace + 1;  /* Point to just after the '}' */
4264                 continue;
4265
4266             /* \c is a control character */
4267             case 'c':
4268                 s++;
4269                 if (s < send) {
4270                     const char * message;
4271
4272                     if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4273                         yyerror(message);
4274                         yyquit();   /* Have always immediately croaked on
4275                                        errors in this */
4276                     }
4277                     d++;
4278                 }
4279                 else {
4280                     yyerror("Missing control char name in \\c");
4281                     yyquit();   /* Are at end of input, no sense continuing */
4282                 }
4283 #ifdef EBCDIC
4284                 non_portable_endpoint++;
4285 #endif
4286                 break;
4287
4288             /* printf-style backslashes, formfeeds, newlines, etc */
4289             case 'b':
4290                 *d++ = '\b';
4291                 break;
4292             case 'n':
4293                 *d++ = '\n';
4294                 break;
4295             case 'r':
4296                 *d++ = '\r';
4297                 break;
4298             case 'f':
4299                 *d++ = '\f';
4300                 break;
4301             case 't':
4302                 *d++ = '\t';
4303                 break;
4304             case 'e':
4305                 *d++ = ESC_NATIVE;
4306                 break;
4307             case 'a':
4308                 *d++ = '\a';
4309                 break;
4310             } /* end switch */
4311
4312             s++;
4313             continue;
4314         } /* end if (backslash) */
4315
4316     default_action:
4317         /* Just copy the input to the output, though we may have to convert
4318          * to/from UTF-8.
4319          *
4320          * If the input has the same representation in UTF-8 as not, it will be
4321          * a single byte, and we don't care about UTF8ness; just copy the byte */
4322         if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4323             *d++ = *s++;
4324         }
4325         else if (! s_is_utf8 && ! d_is_utf8) {
4326             /* If neither source nor output is UTF-8, is also a single byte,
4327              * just copy it; but this byte counts should we later have to
4328              * convert to UTF-8 */
4329             *d++ = *s++;
4330             utf8_variant_count++;
4331         }
4332         else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
4333             const STRLEN len = UTF8SKIP(s);
4334
4335             /* We expect the source to have already been checked for
4336              * malformedness */
4337             assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4338
4339             Copy(s, d, len, U8);
4340             d += len;
4341             s += len;
4342         }
4343         else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4344             STRLEN need = send - s + 1; /* See Note on sizing above. */
4345
4346             SvCUR_set(sv, d - SvPVX_const(sv));
4347             SvPOK_on(sv);
4348             *d = '\0';
4349
4350             if (utf8_variant_count == 0) {
4351                 SvUTF8_on(sv);
4352                 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4353             }
4354             else {
4355                 sv_utf8_upgrade_flags_grow(sv,
4356                                            SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4357                                            need);
4358                 d = SvPVX(sv) + SvCUR(sv);
4359             }
4360             d_is_utf8 = TRUE;
4361             goto default_action; /* Redo, having upgraded so both are UTF-8 */
4362         }
4363         else {  /* UTF8ness matters: convert this non-UTF8 source char to
4364                    UTF-8 for output.  It will occupy 2 bytes, but don't include
4365                    the input byte since we haven't incremented 's' yet. See
4366                    Note on sizing above. */
4367             const STRLEN off = d - SvPVX(sv);
4368             const STRLEN extra = 2 + (send - s - 1) + 1;
4369             if (off + extra > SvLEN(sv)) {
4370                 d = off + SvGROW(sv, off + extra);
4371             }
4372             *d++ = UTF8_EIGHT_BIT_HI(*s);
4373             *d++ = UTF8_EIGHT_BIT_LO(*s);
4374             s++;
4375         }
4376     } /* while loop to process each character */
4377
4378     {
4379         const STRLEN off = d - SvPVX(sv);
4380
4381         /* See if room for the terminating NUL */
4382         if (UNLIKELY(off >= SvLEN(sv))) {
4383
4384 #ifndef DEBUGGING
4385
4386             if (off > SvLEN(sv))
4387 #endif
4388                 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4389                         " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4390
4391             /* Whew!  Here we don't have room for the terminating NUL, but
4392              * everything else so far has fit.  It's not too late to grow
4393              * to fit the NUL and continue on.  But it is a bug, as the code
4394              * above was supposed to have made room for this, so under
4395              * DEBUGGING builds, we panic anyway.  */
4396             d = off + SvGROW(sv, off + 1);
4397         }
4398     }
4399
4400     /* terminate the string and set up the sv */
4401     *d = '\0';
4402     SvCUR_set(sv, d - SvPVX_const(sv));
4403
4404     SvPOK_on(sv);
4405     if (d_is_utf8) {
4406         SvUTF8_on(sv);
4407     }
4408
4409     /* shrink the sv if we allocated more than we used */
4410     if (SvCUR(sv) + 5 < SvLEN(sv)) {
4411         SvPV_shrink_to_cur(sv);
4412     }
4413
4414     /* return the substring (via pl_yylval) only if we parsed anything */
4415     if (s > start) {
4416         char *s2 = start;
4417         for (; s2 < s; s2++) {
4418             if (*s2 == '\n')
4419                 COPLINE_INC_WITH_HERELINES;
4420         }
4421         SvREFCNT_inc_simple_void_NN(sv);
4422         if (   (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4423             && ! PL_parser->lex_re_reparsing)
4424         {
4425             const char *const key = PL_lex_inpat ? "qr" : "q";
4426             const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4427             const char *type;
4428             STRLEN typelen;
4429
4430             if (PL_lex_inwhat == OP_TRANS) {
4431                 type = "tr";
4432                 typelen = 2;
4433             } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4434                 type = "s";
4435                 typelen = 1;
4436             } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4437                 type = "q";
4438                 typelen = 1;
4439             } else {
4440                 type = "qq";
4441                 typelen = 2;
4442             }
4443
4444             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4445                                 type, typelen, NULL);
4446         }
4447         pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4448     }
4449     LEAVE_with_name("scan_const");
4450     return s;
4451 }
4452
4453 /* S_intuit_more
4454  * Returns TRUE if there's more to the expression (e.g., a subscript),
4455  * FALSE otherwise.
4456  *
4457  * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4458  *
4459  * ->[ and ->{ return TRUE
4460  * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4461  * { and [ outside a pattern are always subscripts, so return TRUE
4462  * if we're outside a pattern and it's not { or [, then return FALSE
4463  * if we're in a pattern and the first char is a {
4464  *   {4,5} (any digits around the comma) returns FALSE
4465  * if we're in a pattern and the first char is a [
4466  *   [] returns FALSE
4467  *   [SOMETHING] has a funky heuristic to decide whether it's a
4468  *      character class or not.  It has to deal with things like
4469  *      /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4470  * anything else returns TRUE
4471  */
4472
4473 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4474
4475 STATIC int
4476 S_intuit_more(pTHX_ char *s, char *e)
4477 {
4478     PERL_ARGS_ASSERT_INTUIT_MORE;
4479
4480     /* This function has been mostly untouched for a long time, due to its,
4481      * 'scariness', and lack of comments.  khw has gone through and done some
4482      * cleanup, while finding various instances of problematic behavior.
4483      * Rather than change this base-level function immediately, khw has added
4484      * commentary to those areas. */
4485
4486     /* If recursed within brackets, there is more to the expression */
4487     if (PL_lex_brackets)
4488         return TRUE;
4489
4490     /* If begins with '->' ... */
4491     if (s[0] == '-' && s[1] == '>') {
4492
4493         /* '->[' and '->{' imply more to the expression */
4494         if (s[2] == '[' || s[2] == '{') {
4495             return TRUE;
4496         }
4497
4498         /* Any post deref construct implies more to the expression */
4499         if (   FEATURE_POSTDEREF_QQ_IS_ENABLED
4500             && (   (s[2] == '$' && (    s[3] == '*'
4501                                     || (s[3] == '#' && s[4] == '*')))
4502                 || (s[2] == '@' && memCHRs("*[{", s[3])) ))
4503         {
4504             return TRUE;
4505         }
4506     }
4507
4508     if (s[0] != '{' && s[0] != '[')
4509         return FALSE;
4510
4511     /* quit immediately from any errors from now on */
4512     PL_parser->sub_no_recover = TRUE;
4513
4514     /* Here is '{' or '['.  Outside patterns, they're always subscripts */
4515     if (!PL_lex_inpat)
4516         return TRUE;
4517
4518     /* In a pattern, so maybe we have {n,m}, in which case, there isn't more to
4519      * the expression.
4520      *
4521      * khw: This assumes that anything matching regcurly is a character class.
4522      * The syntax of regcurly has been loosened since this function was
4523      * written, and regcurly never required a comma, as in {0}.  Probably it is
4524      * ok as-is */
4525     if (s[0] == '{') {
4526         if (regcurly(s, e, NULL)) {
4527             return FALSE;
4528         }
4529         return TRUE;
4530     }
4531
4532     /* Here is '[': maybe we have a character class.  Examine the guts */
4533     s++;
4534
4535     /* '^' implies a character class; An empty '[]' isn't legal, but it does
4536      * mean there isn't more to come */
4537     if (s[0] == ']' || s[0] == '^')
4538         return FALSE;
4539
4540     /* Find matching ']'.  khw: This means any s[1] below is guaranteed to
4541      * exist */
4542     const char * const send = (char *) memchr(s, ']', e - s);
4543     if (! send)         /* has to be an expression */
4544         return TRUE;
4545
4546     /* If the construct consists entirely of one or two digits, call it a
4547      * subscript. */
4548     if (isDIGIT(s[0]) && send - s <= 2 && (send - s == 1 || (isDIGIT(s[1])))) {
4549         return TRUE;
4550     }
4551
4552     /* this is terrifying, and it mostly works.  See GH #16478.
4553      *
4554      * khw: That ticket shows that the heuristics here get things wrong.  That
4555      * most of the weights are divisible by 5 indicates that not a lot of
4556      * tuning was done, and that the values are fairly arbitrary.  Especially
4557      * problematic are when all characters in the construct are numeric.  We
4558      * have [89] always resolving to a subscript, though that could well be a
4559      * character class that is related to finding non-octals.  And [100] is a
4560      * character class when it could well be a subscript. */
4561
4562     int weight;
4563
4564     if (s[0] == '$') {  /* First char is dollar; lean very slightly to it
4565                            being a subscript */
4566         weight = -1;
4567     }
4568     else {              /* Otherwise, lean a little more towards it being a
4569                            character class. */
4570         weight = 2;
4571     }
4572
4573     /* Unsigned version of current character */
4574     unsigned char un_char = 0;
4575
4576     /* Keep track of how many multiple occurrences of the same character there
4577      * are */
4578     char seen[256];
4579     Zero(seen, 256, char);
4580
4581     /* Examine each character in the construct */
4582     bool first_time = true;
4583     for (; s < send; s++, first_time = false) {
4584         unsigned char prev_un_char = un_char;
4585         un_char = (unsigned char) s[0];
4586         switch (s[0]) {
4587           case '@':
4588           case '&':
4589           case '$':
4590
4591             /* Each additional occurrence of one of these three strongly
4592              * indicates it is a subscript */
4593             weight -= seen[un_char] * 10;
4594
4595             /* Following one of these characters, we look to see if there is an
4596              * identifier already found in the program by that name.  If so,
4597              * strongly suspect this isn't a character class */
4598             if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4599                 int len;
4600                 char tmpbuf[sizeof PL_tokenbuf * 4];
4601                 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4602                 len = (int)strlen(tmpbuf);
4603                 if (   len > 1
4604                     && gv_fetchpvn_flags(tmpbuf,
4605                                          len,
4606                                          UTF ? SVf_UTF8 : 0,
4607                                          SVt_PV))
4608                     weight -= 100;
4609                 else    /* Not a multi-char identifier already known in the
4610                            program; is somewhat likely to be a subscript */
4611                     weight -= 10;
4612             }
4613             else if (   s[0] == '$'
4614                      && s[1]
4615                      && memCHRs("[#!%*<>()-=", s[1]))
4616             {
4617                 /* Here we have what could be a punctuation variable.  If the
4618                  * next character after it is a closing bracket, it makes it
4619                  * quite likely to be that, and hence a subscript.  If it is
4620                  * something else, more mildly a subscript */
4621                 if (/*{*/ memCHRs("])} =", s[2]))
4622                     weight -= 10;
4623                 else
4624                     weight -= 1;
4625             }
4626             break;
4627
4628           case '\\':
4629             if (s[1]) {
4630                 if (memCHRs("wds]", s[1]))
4631                     weight += 100;  /* \w \d \s => strongly charclass */
4632                     /* khw: Why not \W \D \S \h \v, etc as well? */
4633                 else if (seen[(U8)'\''] || seen[(U8)'"'])
4634                     weight += 1;    /* \' => mildly charclass */
4635                 else if (memCHRs("abcfnrtvx", s[1]))
4636                     weight += 40;   /* \n, etc => charclass */
4637                     /* khw: Why not \e etc as well? */
4638                 else if (isDIGIT(s[1])) {
4639                     weight += 40;   /* \123 => charclass */
4640                     while (s[1] && isDIGIT(s[1]))
4641                         s++;
4642                 }
4643             }
4644             else /* \ followed by NUL strongly indicates character class */
4645                 weight += 100;
4646             break;
4647
4648           case '-':
4649             /* If it is something like '-\', it is more likely to be a
4650              * character class.
4651              *
4652              * khw: The rest of the conditionals in this 'case' really should
4653              * be subject to an 'else' of this condition */
4654             if (s[1] == '\\')
4655                 weight += 50;
4656
4657             /* If it is something like 'a-' or '0-', it is more likely to
4658              * be a character class. '!' is the first ASCII graphic, so '!-'
4659              * would be the start of a range of graphics. */
4660             if (! first_time && memCHRs("aA01! ", prev_un_char))
4661                 weight += 30;
4662
4663             /* If it is something like '-Z' or '-7' (for octal) or '-9' it
4664              * is more likely to be a character class. '~' is the final ASCII
4665              * graphic, so '-~' would be the end of a range of graphics.
4666              *
4667              * khw: Having [-z] really doesn't imply what the comments above
4668              * indicate, so this should only be tested when '! first_time' */
4669             if (memCHRs("zZ79~", s[1]))
4670                 weight += 30;
4671
4672             /* If it is something like -1 or -$foo, it is more likely to be a
4673              * subscript.  */
4674             if (first_time && (isDIGIT(s[1]) || s[1] == '$')) {
4675                 weight -= 5;    /* cope with negative subscript */
4676             }
4677             break;
4678
4679           default:
4680             if (  (first_time || (  ! isWORDCHAR(prev_un_char)
4681                                   &&  prev_un_char != '$'
4682                                   &&  prev_un_char != '@'
4683                                   &&  prev_un_char != '&'))
4684                 && isALPHA(s[0])
4685                 && isALPHA(s[1]))
4686             {
4687                 /* Here it's \W (that isn't [$@&] ) followed immediately by two
4688                  * alphas in a row.  Accumulate all the consecutive alphas */
4689                 char *d = s;
4690                 while (isALPHA(s[0]))
4691                     s++;
4692
4693                 /* If those alphas spell a keyword, it's almost certainly not a
4694                  * character class */
4695                 if (keyword(d, s - d, 0))
4696                     weight -= 150;
4697
4698                 /* khw: Should those alphas be marked as seen? */
4699             }
4700
4701             /* Consecutive chars like [...12...] and [...ab...] are presumed
4702              * more likely to be character classes */
4703             if (    ! first_time
4704                 && (   NATIVE_TO_LATIN1(un_char)
4705                     == NATIVE_TO_LATIN1(prev_un_char) + 1))
4706             {
4707                 weight += 5;
4708             }
4709
4710             /* But repeating a character inside a character class does nothing,
4711              * like [aba], so less likely that someone makes such a class, more
4712              * likely that it is a subscript; the more repeats, the less
4713              * likely. */
4714             weight -= seen[un_char];
4715             break;
4716         }   /* End of switch */
4717
4718         /* khw: 'seen' is declared as a char.  This ++ can cause it to wrap.
4719          * This gives different results with compilers for which a plain 'char'
4720          * is actually unsigned, versus those where it is signed.  I believe it
4721          * is undefined behavior to wrap a 'signed'.  I think it should be
4722          * instead declared an unsigned int to make the chances of wrapping
4723          * essentially zero.
4724          *
4725          * And I believe that extra backslashes are different from other
4726          * repeated characters. */
4727         seen[un_char]++;
4728     }   /* End of loop through each character of the construct */
4729
4730     if (weight >= 0)    /* probably a character class */
4731         return FALSE;
4732
4733     return TRUE;
4734 }
4735
4736 /*
4737  * S_intuit_method
4738  *
4739  * Does all the checking to disambiguate
4740  *   foo bar
4741  * between foo(bar) and bar->foo.  Returns 0 if not a method, otherwise
4742  * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4743  *
4744  * First argument is the stuff after the first token, e.g. "bar".
4745  *
4746  * Not a method if foo is a filehandle.
4747  * Not a method if foo is a subroutine prototyped to take a filehandle.
4748  * Not a method if it's really "Foo $bar"
4749  * Method if it's "foo $bar"
4750  * Not a method if it's really "print foo $bar"
4751  * Method if it's really "foo package::" (interpreted as package->foo)
4752  * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4753  * Not a method if bar is a filehandle or package, but is quoted with
4754  *   =>
4755  */
4756
4757 STATIC int
4758 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4759 {
4760     char *s = start + (*start == '$');
4761     char tmpbuf[sizeof PL_tokenbuf];
4762     STRLEN len;
4763     GV* indirgv;
4764         /* Mustn't actually add anything to a symbol table.
4765            But also don't want to "initialise" any placeholder
4766            constants that might already be there into full
4767            blown PVGVs with attached PVCV.  */
4768     GV * const gv =
4769         ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4770
4771     PERL_ARGS_ASSERT_INTUIT_METHOD;
4772
4773     if (!FEATURE_INDIRECT_IS_ENABLED)
4774         return 0;
4775
4776     if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4777             return 0;
4778     if (cv && SvPOK(cv)) {
4779         const char *proto = CvPROTO(cv);
4780         if (proto) {
4781             while (*proto && (isSPACE(*proto) || *proto == ';'))
4782                 proto++;
4783             if (*proto == '*')
4784                 return 0;
4785         }
4786     }
4787
4788     if (*start == '$') {
4789         SSize_t start_off = start - SvPVX(PL_linestr);
4790         if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4791             || isUPPER(*PL_tokenbuf))
4792             return 0;
4793         /* this could be $# */
4794         if (isSPACE(*s))
4795             s = skipspace(s);
4796         PL_bufptr = SvPVX(PL_linestr) + start_off;
4797         PL_expect = XREF;
4798         return *s == '(' ? METHCALL : METHCALL0;
4799     }
4800
4801     s = scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
4802     /* start is the beginning of the possible filehandle/object,
4803      * and s is the end of it
4804      * tmpbuf is a copy of it (but with single quotes as double colons)
4805      */
4806
4807     if (!keyword(tmpbuf, len, 0)) {
4808         if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4809             len -= 2;
4810             tmpbuf[len] = '\0';
4811             goto bare_package;
4812         }
4813         indirgv = gv_fetchpvn_flags(tmpbuf, len,
4814                                     GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4815                                     SVt_PVCV);
4816         if (indirgv && SvTYPE(indirgv) != SVt_NULL
4817          && (!isGV(indirgv) || GvCVu(indirgv)))
4818             return 0;
4819         /* filehandle or package name makes it a method */
4820         if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4821             s = skipspace(s);
4822             if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4823                 return 0;       /* no assumptions -- "=>" quotes bareword */
4824       bare_package:
4825             NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4826                                                   S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4827             NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4828             PL_expect = XTERM;
4829             force_next(BAREWORD);
4830             PL_bufptr = s;
4831             return *s == '(' ? METHCALL : METHCALL0;
4832         }
4833     }
4834     return 0;
4835 }
4836
4837 /* Encoded script support. filter_add() effectively inserts a
4838  * 'pre-processing' function into the current source input stream.
4839  * Note that the filter function only applies to the current source file
4840  * (e.g., it will not affect files 'require'd or 'use'd by this one).
4841  *
4842  * The datasv parameter (which may be NULL) can be used to pass
4843  * private data to this instance of the filter. The filter function
4844  * can recover the SV using the FILTER_DATA macro and use it to
4845  * store private buffers and state information.
4846  *
4847  * The supplied datasv parameter is upgraded to a PVIO type
4848  * and the IoDIRP/IoANY field is used to store the function pointer,
4849  * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4850  * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4851  * private use must be set using malloc'd pointers.
4852  */
4853
4854 SV *
4855 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4856 {
4857     if (!funcp)
4858         return NULL;
4859
4860     if (!PL_parser)
4861         return NULL;
4862
4863     if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4864         Perl_croak(aTHX_ "Source filters apply only to byte streams");
4865
4866     if (!PL_rsfp_filters)
4867         PL_rsfp_filters = newAV();
4868     if (!datasv)
4869         datasv = newSV(0);
4870     SvUPGRADE(datasv, SVt_PVIO);
4871     IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4872     IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4873     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4874                           FPTR2DPTR(void *, IoANY(datasv)),
4875                           SvPV_nolen(datasv)));
4876     av_unshift(PL_rsfp_filters, 1);
4877     av_store(PL_rsfp_filters, 0, datasv) ;
4878     if (
4879         !PL_parser->filtered
4880      && PL_parser->lex_flags & LEX_EVALBYTES
4881      && PL_bufptr < PL_bufend
4882     ) {
4883         const char *s = PL_bufptr;
4884         while (s < PL_bufend) {
4885             if (*s == '\n') {
4886                 SV *linestr = PL_parser->linestr;
4887                 char *buf = SvPVX(linestr);
4888                 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4889                 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4890                 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4891                 STRLEN const linestart_pos = PL_parser->linestart - buf;
4892                 STRLEN const last_uni_pos =
4893                     PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4894                 STRLEN const last_lop_pos =
4895                     PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4896                 av_push(PL_rsfp_filters, linestr);
4897                 PL_parser->linestr =
4898                     newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4899                 buf = SvPVX(PL_parser->linestr);
4900                 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4901                 PL_parser->bufptr = buf + bufptr_pos;
4902                 PL_parser->oldbufptr = buf + oldbufptr_pos;
4903                 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4904                 PL_parser->linestart = buf + linestart_pos;
4905                 if (PL_parser->last_uni)
4906                     PL_parser->last_uni = buf + last_uni_pos;
4907                 if (PL_parser->last_lop)
4908                     PL_parser->last_lop = buf + last_lop_pos;
4909                 SvLEN_set(linestr, SvCUR(linestr));
4910                 SvCUR_set(linestr, s - SvPVX(linestr));
4911                 PL_parser->filtered = 1;
4912                 break;
4913             }
4914             s++;
4915         }
4916     }
4917     return(datasv);
4918 }
4919
4920 /*
4921 =for apidoc_section $filters
4922 =for apidoc filter_del
4923
4924 Delete most recently added instance of the filter function argument
4925
4926 =cut
4927 */
4928
4929 void
4930 Perl_filter_del(pTHX_ filter_t funcp)
4931 {
4932     SV *datasv;
4933
4934     PERL_ARGS_ASSERT_FILTER_DEL;
4935
4936 #ifdef DEBUGGING
4937     DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4938                           FPTR2DPTR(void*, funcp)));
4939 #endif
4940     if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4941         return;
4942     /* if filter is on top of stack (usual case) just pop it off */
4943     datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4944     if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4945         SvREFCNT_dec(av_pop(PL_rsfp_filters));
4946
4947         return;
4948     }
4949     /* we need to search for the correct entry and clear it     */
4950     Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4951 }
4952
4953
4954 /* Invoke the idxth filter function for the current rsfp.        */
4955 /* maxlen 0 = read one text line */
4956 I32
4957 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4958 {
4959     filter_t funcp;
4960     I32 ret;
4961     SV *datasv = NULL;
4962     /* This API is bad. It should have been using unsigned int for maxlen.
4963        Not sure if we want to change the API, but if not we should sanity
4964        check the value here.  */
4965     unsigned int correct_length = maxlen < 0 ?  PERL_INT_MAX : maxlen;
4966
4967     PERL_ARGS_ASSERT_FILTER_READ;
4968
4969     if (!PL_parser || !PL_rsfp_filters)
4970         return -1;
4971     if (idx > AvFILLp(PL_rsfp_filters)) {       /* Any more filters?    */
4972         /* Provide a default input filter to make life easy.    */
4973         /* Note that we append to the line. This is handy.      */
4974         DEBUG_P(PerlIO_printf(Perl_debug_log,
4975                               "filter_read %d: from rsfp\n", idx));
4976         if (correct_length) {
4977             /* Want a block */
4978             int len ;
4979             const int old_len = SvCUR(buf_sv);
4980
4981             /* ensure buf_sv is large enough */
4982             SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4983             if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4984                                    correct_length)) <= 0) {
4985                 if (PerlIO_error(PL_rsfp))
4986                     return -1;          /* error */
4987                 else
4988                     return 0 ;          /* end of file */
4989             }
4990             SvCUR_set(buf_sv, old_len + len) ;
4991             SvPVX(buf_sv)[old_len + len] = '\0';
4992         } else {
4993             /* Want a line */
4994             if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4995                 if (PerlIO_error(PL_rsfp))
4996                     return -1;          /* error */
4997                 else
4998                     return 0 ;          /* end of file */
4999             }
5000         }
5001         return SvCUR(buf_sv);
5002     }
5003     /* Skip this filter slot if filter has been deleted */
5004     if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
5005         DEBUG_P(PerlIO_printf(Perl_debug_log,
5006                               "filter_read %d: skipped (filter deleted)\n",
5007                               idx));
5008         return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
5009     }
5010     if (SvTYPE(datasv) != SVt_PVIO) {
5011         if (correct_length) {
5012             /* Want a block */
5013             const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
5014             if (!remainder) return 0; /* eof */
5015             if (correct_length > remainder) correct_length = remainder;
5016             sv_catpvn(buf_sv, SvEND(datasv), correct_length);
5017             SvCUR_set(datasv, SvCUR(datasv) + correct_length);
5018         } else {
5019             /* Want a line */
5020             const char *s = SvEND(datasv);
5021             const char *send = SvPVX(datasv) + SvLEN(datasv);
5022             while (s < send) {
5023                 if (*s == '\n') {
5024                     s++;
5025                     break;
5026                 }
5027                 s++;
5028             }
5029             if (s == send) return 0; /* eof */
5030             sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
5031             SvCUR_set(datasv, s-SvPVX(datasv));
5032         }
5033         return SvCUR(buf_sv);
5034     }
5035     /* Get function pointer hidden within datasv        */
5036     funcp = DPTR2FPTR(filter_t, IoANY(datasv));
5037     DEBUG_P(PerlIO_printf(Perl_debug_log,
5038                           "filter_read %d: via function %p (%s)\n",
5039                           idx, (void*)datasv, SvPV_nolen_const(datasv)));
5040     /* Call function. The function is expected to       */
5041     /* call "FILTER_READ(idx+1, buf_sv)" first.         */
5042     /* Return: <0:error, =0:eof, >0:not eof             */
5043     ENTER;
5044     save_scalar(PL_errgv);
5045
5046     /* although this calls out to a random C function, there's a good
5047      * chance that that function will call back into perl (e.g. using
5048      * Filter::Util::Call). So downgrade the stack to
5049      * non-reference-counted for backwards compatibility - i.e. do the
5050      * equivalent of xs_wrap(), but this time we know there are no
5051      * args to be passed or returned on the stack, simplifying it.
5052      */
5053 #ifdef PERL_RC_STACK
5054     assert(AvREAL(PL_curstack));
5055     I32 oldbase = PL_curstackinfo->si_stack_nonrc_base;
5056     I32 oldsp   = PL_stack_sp - PL_stack_base;
5057     if (!oldbase)
5058         PL_curstackinfo->si_stack_nonrc_base = oldsp + 1;
5059 #endif
5060
5061     ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
5062
5063 #ifdef PERL_RC_STACK
5064     assert(oldsp == PL_stack_sp - PL_stack_base);
5065     assert(AvREAL(PL_curstack));
5066     assert(PL_curstackinfo->si_stack_nonrc_base ==
5067                                         oldbase ? oldbase : oldsp + 1);
5068     PL_curstackinfo->si_stack_nonrc_base = oldbase;
5069 #endif
5070
5071     LEAVE;
5072     return ret;
5073 }
5074
5075 STATIC char *
5076 S_filter_gets(pTHX_ SV *sv, STRLEN append)
5077 {
5078     PERL_ARGS_ASSERT_FILTER_GETS;
5079
5080 #ifdef PERL_CR_FILTER
5081     if (!PL_rsfp_filters) {
5082         filter_add(S_cr_textfilter,NULL);
5083     }
5084 #endif
5085     if (PL_rsfp_filters) {
5086         if (!append)
5087             SvCUR_set(sv, 0);   /* start with empty line        */
5088         if (FILTER_READ(0, sv, 0) > 0)
5089             return ( SvPVX(sv) ) ;
5090         else
5091             return NULL ;
5092     }
5093     else
5094         return (sv_gets(sv, PL_rsfp, append));
5095 }
5096
5097 STATIC HV *
5098 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
5099 {
5100     GV *gv;
5101
5102     PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
5103
5104     if (memEQs(pkgname, len, "__PACKAGE__"))
5105         return PL_curstash;
5106
5107     if (len > 2
5108         && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
5109         && (gv = gv_fetchpvn_flags(pkgname,
5110                                    len,
5111                                    ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
5112     {
5113         return GvHV(gv);                        /* Foo:: */
5114     }
5115
5116     /* use constant CLASS => 'MyClass' */
5117     gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
5118     if (gv && GvCV(gv)) {
5119         SV * const sv = cv_const_sv(GvCV(gv));
5120         if (sv)
5121             return gv_stashsv(sv, 0);
5122     }
5123
5124     return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
5125 }
5126
5127
5128 STATIC char *
5129 S_tokenize_use(pTHX_ int is_use, char *s) {
5130     PERL_ARGS_ASSERT_TOKENIZE_USE;
5131
5132     if (PL_expect != XSTATE)
5133         /* diag_listed_as: "use" not allowed in expression */
5134         yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
5135                     is_use ? "use" : "no"));
5136     PL_expect = XTERM;
5137     s = skipspace(s);
5138     if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
5139         s = force_version(s, TRUE);
5140         if (*s == ';' || *s == '}'
5141                 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
5142             NEXTVAL_NEXTTOKE.opval = NULL;
5143             force_next(BAREWORD);
5144         }
5145         else if (*s == 'v') {
5146             s = force_word(s,BAREWORD,FALSE,TRUE);
5147             s = force_version(s, FALSE);
5148         }
5149     }
5150     else {
5151         s = force_word(s,BAREWORD,FALSE,TRUE);
5152         s = force_version(s, FALSE);
5153     }
5154     pl_yylval.ival = is_use;
5155     return s;
5156 }
5157 #ifdef DEBUGGING
5158     static const char* const exp_name[] =
5159         { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
5160           "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
5161           "SIGVAR", "TERMORDORDOR"
5162         };
5163 #endif
5164
5165 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
5166 STATIC bool
5167 S_word_takes_any_delimiter(char *p, STRLEN len)
5168 {
5169     return (len == 1 && memCHRs("msyq", p[0]))
5170             || (len == 2
5171                 && ((p[0] == 't' && p[1] == 'r')
5172                     || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
5173 }
5174
5175 static void
5176 S_check_scalar_slice(pTHX_ char *s)
5177 {
5178     s++;
5179     while (SPACE_OR_TAB(*s)) s++;
5180     if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
5181                                                              PL_bufend,
5182                                                              UTF))
5183     {
5184         return;
5185     }
5186     while (    isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
5187            || (*s && memCHRs(" \t$#+-'\"", *s)))
5188     {
5189         s += UTF ? UTF8SKIP(s) : 1;
5190     }
5191     if (*s == '}' || *s == ']')
5192         pl_yylval.ival = OPpSLICEWARNING;
5193 }
5194
5195 #define lex_token_boundary() S_lex_token_boundary(aTHX)
5196 static void
5197 S_lex_token_boundary(pTHX)
5198 {
5199     PL_oldoldbufptr = PL_oldbufptr;
5200     PL_oldbufptr = PL_bufptr;
5201 }
5202
5203 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5204 static char *
5205 S_vcs_conflict_marker(pTHX_ char *s)
5206 {
5207     lex_token_boundary();
5208     PL_bufptr = s;
5209     yyerror("Version control conflict marker");
5210     while (s < PL_bufend && *s != '\n')
5211         s++;
5212     return s;
5213 }
5214
5215 static int
5216 yyl_sigvar(pTHX_ char *s)
5217 {
5218     /* we expect the sigil and optional var name part of a
5219      * signature element here. Since a '$' is not necessarily
5220      * followed by a var name, handle it specially here; the general
5221      * yylex code would otherwise try to interpret whatever follows
5222      * as a var; e.g. ($, ...) would be seen as the var '$,'
5223      */
5224
5225     U8 sigil;
5226
5227     s = skipspace(s);
5228     sigil = *s++;
5229     PL_bufptr = s; /* for error reporting */
5230     switch (sigil) {
5231     case '$':
5232     case '@':
5233     case '%':
5234         /* spot stuff that looks like an prototype */
5235         if (memCHRs("$:@%&*;\\[]", *s)) {
5236             yyerror("Illegal character following sigil in a subroutine signature");
5237             break;
5238         }
5239         /* '$#' is banned, while '$ # comment' isn't */
5240         if (*s == '#') {
5241             yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5242             break;
5243         }
5244         s = skipspace(s);
5245         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5246             char *dest = PL_tokenbuf + 1;
5247             /* read var name, including sigil, into PL_tokenbuf */
5248             PL_tokenbuf[0] = sigil;
5249             parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5250                 0, cBOOL(UTF), FALSE, FALSE);
5251             *dest = '\0';
5252             assert(PL_tokenbuf[1]); /* we have a variable name */
5253         }
5254         else {
5255             *PL_tokenbuf = 0;
5256             PL_in_my = 0;
5257         }
5258
5259         s = skipspace(s);
5260         /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5261          * as the ASSIGNOP, and exclude other tokens that start with =
5262          */
5263         if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5264             /* save now to report with the same context as we did when
5265              * all ASSIGNOPS were accepted */
5266             PL_oldbufptr = s;
5267
5268             ++s;
5269             NEXTVAL_NEXTTOKE.ival = OP_SASSIGN;
5270             force_next(ASSIGNOP);
5271             PL_expect = XTERM;
5272         }
5273         else if(*s == '/' && s[1] == '/' && s[2] == '=') {
5274             PL_oldbufptr = s;
5275
5276             s += 3;
5277             NEXTVAL_NEXTTOKE.ival = OP_DORASSIGN;
5278             force_next(ASSIGNOP);
5279             PL_expect = XTERM;
5280         }
5281         else if(*s == '|' && s[1] == '|' && s[2] == '=') {
5282             PL_oldbufptr = s;
5283
5284             s += 3;
5285             NEXTVAL_NEXTTOKE.ival = OP_ORASSIGN;
5286             force_next(ASSIGNOP);
5287             PL_expect = XTERM;
5288         }
5289         else if (*s == ',' || *s == ')') {
5290             PL_expect = XOPERATOR;
5291         }
5292         else {
5293             /* make sure the context shows the unexpected character and
5294              * hopefully a bit more */
5295             if (*s) ++s;
5296             while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5297                 s++;
5298             PL_bufptr = s; /* for error reporting */
5299             yyerror("Illegal operator following parameter in a subroutine signature");
5300             PL_in_my = 0;
5301         }
5302         if (*PL_tokenbuf) {
5303             NEXTVAL_NEXTTOKE.ival = sigil;
5304             force_next('p'); /* force a signature pending identifier */
5305         }
5306         break;
5307
5308     case ')':
5309         PL_expect = XBLOCK;
5310         break;
5311     case ',': /* handle ($a,,$b) */
5312         break;
5313
5314     default:
5315         PL_in_my = 0;
5316         yyerror("A signature parameter must start with '$', '@' or '%'");
5317         /* very crude error recovery: skip to likely next signature
5318          * element */
5319         while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5320             s++;
5321         break;
5322     }
5323
5324     switch (sigil) {
5325         case ',': TOKEN (PERLY_COMMA);
5326         case '$': TOKEN (PERLY_DOLLAR);
5327         case '@': TOKEN (PERLY_SNAIL);
5328         case '%': TOKEN (PERLY_PERCENT_SIGN);
5329         case ')': TOKEN (PERLY_PAREN_CLOSE);
5330         default:  TOKEN (sigil);
5331     }
5332 }
5333
5334 static int
5335 yyl_dollar(pTHX_ char *s)
5336 {
5337     CLINE;
5338
5339     if (PL_expect == XPOSTDEREF) {
5340         if (s[1] == '#') {
5341             s++;
5342             POSTDEREF(DOLSHARP);
5343         }
5344         POSTDEREF(PERLY_DOLLAR);
5345     }
5346
5347     if (   s[1] == '#'
5348         && (   isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5349             || memCHRs("{$:+-@", s[2])))
5350     {
5351         PL_tokenbuf[0] = '@';
5352         s = scan_ident(s + 1, PL_tokenbuf + 1,
5353                        sizeof PL_tokenbuf - 1, FALSE);
5354         if (PL_expect == XOPERATOR) {
5355             char *d = s;
5356             if (PL_bufptr > s) {
5357                 d = PL_bufptr-1;
5358                 PL_bufptr = PL_oldbufptr;
5359             }
5360             no_op("Array length", d);
5361         }
5362         if (!PL_tokenbuf[1])
5363             PREREF(DOLSHARP);
5364         PL_expect = XOPERATOR;
5365         force_ident_maybe_lex('#');
5366         TOKEN(DOLSHARP);
5367     }
5368
5369     PL_tokenbuf[0] = '$';
5370     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5371     if (PL_expect == XOPERATOR) {
5372         char *d = s;
5373         if (PL_bufptr > s) {
5374             d = PL_bufptr-1;
5375             PL_bufptr = PL_oldbufptr;
5376         }
5377         no_op("Scalar", d);
5378     }
5379     if (!PL_tokenbuf[1]) {
5380         if (s == PL_bufend)
5381             yyerror("Final $ should be \\$ or $name");
5382         PREREF(PERLY_DOLLAR);
5383     }
5384
5385     {
5386         const char tmp = *s;
5387         if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5388             s = skipspace(s);
5389
5390         if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5391             && intuit_more(s, PL_bufend)) {
5392             if (*s == '[') {
5393                 PL_tokenbuf[0] = '@';
5394                 if (ckWARN(WARN_SYNTAX)) {
5395                     char *t = s+1;
5396
5397                     while ( t < PL_bufend ) {
5398                         if (isSPACE(*t)) {
5399                             do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5400                             /* consumed one or more space chars */
5401                         } else if (*t == '$' || *t == '@') {
5402                             /* could be more than one '$' like $$ref or @$ref */
5403                             do { t++; } while (t < PL_bufend && *t == '$');
5404
5405                             /* could be an abigail style identifier like $ foo */
5406                             while (t < PL_bufend && *t == ' ') t++;
5407
5408                             /* strip off the name of the var */
5409                             while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5410                                 t += UTF ? UTF8SKIP(t) : 1;
5411                             /* consumed a varname */
5412                         } else if (isDIGIT(*t)) {
5413                             /* deal with hex constants like 0x11 */
5414                             if (t[0] == '0' && t[1] == 'x') {
5415                                 t += 2;
5416                                 while (t < PL_bufend && isXDIGIT(*t)) t++;
5417                             } else {
5418                                 /* deal with decimal/octal constants like 1 and 0123 */
5419                                 do { t++; } while (isDIGIT(*t));
5420                                 if (t<PL_bufend && *t == '.') {
5421                                     do { t++; } while (isDIGIT(*t));
5422                                 }
5423                             }
5424                             /* consumed a number */
5425                         } else {
5426                             /* not a var nor a space nor a number */
5427                             break;
5428                         }
5429                     }
5430                     if (t < PL_bufend && *t++ == ',') {
5431                         PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5432                         while (t < PL_bufend && *t != ']')
5433                             t++;
5434                         Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5435                                     "Multidimensional syntax %" UTF8f " not supported",
5436                                     UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5437                     }
5438                 }
5439             }
5440             else if (*s == '{') {
5441                 char *t;
5442                 PL_tokenbuf[0] = '%';
5443                 if (    strEQ(PL_tokenbuf+1, "SIG")
5444                     && ckWARN(WARN_SYNTAX)
5445                     && (t = (char *) memchr(s, '}', PL_bufend - s))
5446                     && (t = (char *) memchr(t, '=', PL_bufend - t)))
5447                 {
5448                     char tmpbuf[sizeof PL_tokenbuf];
5449                     do {
5450                         t++;
5451                     } while (isSPACE(*t));
5452                     if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5453                         STRLEN len;
5454                         t = scan_word6(t, tmpbuf, sizeof tmpbuf, TRUE,
5455                                       &len, TRUE);
5456                         while (isSPACE(*t))
5457                             t++;
5458                         if (  *t == ';'
5459                             && get_cvn_flags(tmpbuf, len, UTF
5460                                                             ? SVf_UTF8
5461                                                             : 0))
5462                         {
5463                             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5464                                 "You need to quote \"%" UTF8f "\"",
5465                                     UTF8fARG(UTF, len, tmpbuf));
5466                         }
5467                     }
5468                 }
5469             }
5470         }
5471
5472         PL_expect = XOPERATOR;
5473         if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5474             const bool islop = (PL_last_lop == PL_oldoldbufptr);
5475             if (!islop || PL_last_lop_op == OP_GREPSTART)
5476                 PL_expect = XOPERATOR;
5477             else if (memCHRs("$@\"'`q", *s))
5478                 PL_expect = XTERM;              /* e.g. print $fh "foo" */
5479             else if (   memCHRs("&*<%", *s)
5480                      && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5481             {
5482                 PL_expect = XTERM;              /* e.g. print $fh &sub */
5483             }
5484             else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5485                 char tmpbuf[sizeof PL_tokenbuf];
5486                 int t2;
5487                 STRLEN len;
5488                 scan_word6(s, tmpbuf, sizeof tmpbuf, TRUE, &len, FALSE);
5489                 if ((t2 = keyword(tmpbuf, len, 0))) {
5490                     /* binary operators exclude handle interpretations */
5491                     switch (t2) {
5492                     case -KEY_x:
5493                     case -KEY_eq:
5494                     case -KEY_ne:
5495                     case -KEY_gt:
5496                     case -KEY_lt:
5497                     case -KEY_ge:
5498                     case -KEY_le:
5499                     case -KEY_cmp:
5500                         break;
5501                     default:
5502                         PL_expect = XTERM;      /* e.g. print $fh length() */
5503                         break;
5504                     }
5505                 }
5506                 else {
5507                     PL_expect = XTERM;  /* e.g. print $fh subr() */
5508                 }
5509             }
5510             else if (isDIGIT(*s))
5511                 PL_expect = XTERM;              /* e.g. print $fh 3 */
5512             else if (*s == '.' && isDIGIT(s[1]))
5513                 PL_expect = XTERM;              /* e.g. print $fh .3 */
5514             else if ((*s == '?' || *s == '-' || *s == '+')
5515                 && !isSPACE(s[1]) && s[1] != '=')
5516                 PL_expect = XTERM;              /* e.g. print $fh -1 */
5517             else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5518                      && s[1] != '/')
5519                 PL_expect = XTERM;              /* e.g. print $fh /.../
5520                                                XXX except DORDOR operator
5521                                             */
5522             else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5523                      && s[2] != '=')
5524                 PL_expect = XTERM;              /* print $fh <<"EOF" */
5525         }
5526     }
5527     force_ident_maybe_lex('$');
5528     TOKEN(PERLY_DOLLAR);
5529 }
5530
5531 static int
5532 yyl_sub(pTHX_ char *s, const int key)
5533 {
5534     char * const tmpbuf = PL_tokenbuf + 1;
5535     bool have_name, have_proto;
5536     STRLEN len;
5537     SV *format_name = NULL;
5538     bool is_method = (key == KEY_method);
5539
5540     /* method always implies signatures */
5541     bool is_sigsub = is_method || FEATURE_SIGNATURES_IS_ENABLED;
5542
5543     SSize_t off = s-SvPVX(PL_linestr);
5544     char *d;
5545
5546     s = skipspace(s); /* can move PL_linestr */
5547
5548     d = SvPVX(PL_linestr)+off;
5549
5550     SAVEBOOL(PL_parser->sig_seen);
5551     PL_parser->sig_seen = FALSE;
5552
5553     if (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5554         || *s == '\''
5555         || (*s == ':' && s[1] == ':'))
5556     {
5557
5558         PL_expect = XATTRBLOCK;
5559         d = scan_word6(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5560                       &len, TRUE);
5561         if (key == KEY_format)
5562             format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5563         *PL_tokenbuf = '&';
5564         if (memchr(tmpbuf, ':', len) || key != KEY_sub
5565          || pad_findmy_pvn(
5566                 PL_tokenbuf, len + 1, 0
5567             ) != NOT_IN_PAD)
5568             sv_setpvn(PL_subname, tmpbuf, len);
5569         else {
5570             sv_setsv(PL_subname,PL_curstname);
5571             sv_catpvs(PL_subname,"::");
5572             sv_catpvn(PL_subname,tmpbuf,len);
5573         }
5574         if (SvUTF8(PL_linestr))
5575             SvUTF8_on(PL_subname);
5576         have_name = TRUE;
5577
5578         s = skipspace(d);
5579     }
5580     else {
5581         if (key == KEY_my || key == KEY_our || key==KEY_state) {
5582             *d = '\0';
5583             /* diag_listed_as: Missing name in "%s sub" */
5584             Perl_croak(aTHX_
5585                       "Missing name in \"%s\"", PL_bufptr);
5586         }
5587         PL_expect = XATTRTERM;
5588         sv_setpvs(PL_subname,"?");
5589         have_name = FALSE;
5590     }
5591
5592     if (key == KEY_format) {
5593         if (format_name) {
5594             NEXTVAL_NEXTTOKE.opval
5595                 = newSVOP(OP_CONST,0, format_name);
5596             NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5597             force_next(BAREWORD);
5598         }
5599         PREBLOCK(KW_FORMAT);
5600     }
5601
5602     /* Look for a prototype */
5603     if (*s == '(' && !is_sigsub) {
5604         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5605         if (!s)
5606             Perl_croak(aTHX_ "Prototype not terminated");
5607         COPLINE_SET_FROM_MULTI_END;
5608         (void)validate_proto(PL_subname, PL_lex_stuff,
5609                              ckWARN(WARN_ILLEGALPROTO), 0);
5610         have_proto = TRUE;
5611
5612         s = skipspace(s);
5613     }
5614     else
5615         have_proto = FALSE;
5616
5617     if (  !(*s == ':' && s[1] != ':')
5618         && (*s != '{' && *s != '(') && key != KEY_format)
5619     {
5620         assert(key == KEY_sub || key == KEY_method ||
5621                key == KEY_AUTOLOAD || key == KEY_DESTROY ||
5622                key == KEY_BEGIN || key == KEY_UNITCHECK || key == KEY_CHECK ||
5623                key == KEY_INIT || key == KEY_END ||
5624                key == KEY_my || key == KEY_state ||
5625                key == KEY_our);
5626         if (!have_name)
5627             Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5628         else if (*s != ';' && *s != '}')
5629             Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5630     }
5631
5632     if (have_proto) {
5633         NEXTVAL_NEXTTOKE.opval =
5634             newSVOP(OP_CONST, 0, PL_lex_stuff);
5635         PL_lex_stuff = NULL;
5636         force_next(THING);
5637     }
5638
5639     if (!have_name) {
5640         if (PL_curstash)
5641             sv_setpvs(PL_subname, "__ANON__");
5642         else
5643             sv_setpvs(PL_subname, "__ANON__::__ANON__");
5644         if (is_method)
5645             TOKEN(KW_METHOD_anon);
5646         else if (is_sigsub)
5647             TOKEN(KW_SUB_anon_sig);
5648         else
5649             TOKEN(KW_SUB_anon);
5650     }
5651     force_ident_maybe_lex('&');
5652     if (is_method)
5653         TOKEN(KW_METHOD_named);
5654     else if (is_sigsub)
5655         TOKEN(KW_SUB_named_sig);
5656     else
5657         TOKEN(KW_SUB_named);
5658 }
5659
5660 static int
5661 yyl_interpcasemod(pTHX_ char *s)
5662 {
5663 #ifdef DEBUGGING
5664     if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5665         Perl_croak(aTHX_
5666                    "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5667                    PL_bufptr, PL_bufend, *PL_bufptr);
5668 #endif
5669
5670     if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5671         /* if at a \E */
5672         if (PL_lex_casemods) {
5673             const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5674             PL_lex_casestack[PL_lex_casemods] = '\0';
5675
5676             if (PL_bufptr != PL_bufend
5677                 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5678                     || oldmod == 'F')) {
5679                 PL_bufptr += 2;
5680                 PL_lex_state = LEX_INTERPCONCAT;
5681             }
5682             PL_lex_allbrackets--;
5683             return REPORT(PERLY_PAREN_CLOSE);
5684         }
5685         else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5686            /* Got an unpaired \E */
5687            Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5688                     "Useless use of \\E");
5689         }
5690         if (PL_bufptr != PL_bufend)
5691             PL_bufptr += 2;
5692         PL_lex_state = LEX_INTERPCONCAT;
5693         return yylex();
5694     }
5695     else {
5696         DEBUG_T({
5697             PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5698         });
5699         s = PL_bufptr + 1;
5700         if (s[1] == '\\' && s[2] == 'E') {
5701             PL_bufptr = s + 3;
5702             PL_lex_state = LEX_INTERPCONCAT;
5703             return yylex();
5704         }
5705         else {
5706             I32 tmp;
5707             if (   memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5708                 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5709             {
5710                 tmp = *s, *s = s[2], s[2] = (char)tmp;  /* misordered... */
5711             }
5712             if ((*s == 'L' || *s == 'U' || *s == 'F')
5713                 && (strpbrk(PL_lex_casestack, "LUF")))
5714             {
5715                 PL_lex_casestack[--PL_lex_casemods] = '\0';
5716                 PL_lex_allbrackets--;
5717                 return REPORT(PERLY_PAREN_CLOSE);
5718             }
5719             if (PL_lex_casemods > 10)
5720                 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5721             PL_lex_casestack[PL_lex_casemods++] = *s;
5722             PL_lex_casestack[PL_lex_casemods] = '\0';
5723             PL_lex_state = LEX_INTERPCONCAT;
5724             NEXTVAL_NEXTTOKE.ival = 0;
5725             force_next((2<<24)|PERLY_PAREN_OPEN);
5726             if (*s == 'l')
5727                 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5728             else if (*s == 'u')
5729                 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5730             else if (*s == 'L')
5731                 NEXTVAL_NEXTTOKE.ival = OP_LC;
5732             else if (*s == 'U')
5733                 NEXTVAL_NEXTTOKE.ival = OP_UC;
5734             else if (*s == 'Q')
5735                 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5736             else if (*s == 'F')
5737                 NEXTVAL_NEXTTOKE.ival = OP_FC;
5738             else
5739                 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5740             PL_bufptr = s + 1;
5741         }
5742         force_next(FUNC);
5743         if (PL_lex_starts) {
5744             s = PL_bufptr;
5745             PL_lex_starts = 0;
5746             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5747             if (PL_lex_casemods == 1 && PL_lex_inpat)
5748                 TOKEN(PERLY_COMMA);
5749             else
5750                 AopNOASSIGN(OP_CONCAT);
5751         }
5752         else
5753             return yylex();
5754     }
5755 }
5756
5757 static int
5758 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5759                         GV **pgv, GV ***pgvp)
5760 {
5761     GV *ogv = NULL;     /* override (winner) */
5762     GV *hgv = NULL;     /* hidden (loser) */
5763     GV *gv = *pgv;
5764
5765     if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5766         CV *cv;
5767         if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5768                                     (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5769                                     SVt_PVCV))
5770             && (cv = GvCVu(gv)))
5771         {
5772             if (GvIMPORTED_CV(gv))
5773                 ogv = gv;
5774             else if (! CvNOWARN_AMBIGUOUS(cv))
5775                 hgv = gv;
5776         }
5777         if (!ogv
5778             && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5779             && (gv = **pgvp)
5780             && (isGV_with_GP(gv)
5781                 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5782                 :   SvPCS_IMPORTED(gv)
5783                 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5784                                                          len, 0), 1)))
5785         {
5786             ogv = gv;
5787         }
5788     }
5789
5790     *pgv = gv;
5791
5792     if (ogv) {
5793         *orig_keyword = key;
5794         return 0;               /* overridden by import or by GLOBAL */
5795     }
5796     else if (gv && !*pgvp
5797              && -key==KEY_lock  /* XXX generalizable kludge */
5798              && GvCVu(gv))
5799     {
5800         return 0;               /* any sub overrides "weak" keyword */
5801     }
5802     else {                      /* no override */
5803         key = -key;
5804         if (key == KEY_dump) {
5805             Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5806         }
5807         *pgv = NULL;
5808         *pgvp = 0;
5809         if (hgv && key != KEY_x)        /* never ambiguous */
5810             Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5811                            "Ambiguous call resolved as CORE::%s(), "
5812                            "qualify as such or use &",
5813                            GvENAME(hgv));
5814         return key;
5815     }
5816 }
5817
5818 static int
5819 yyl_qw(pTHX_ char *s, STRLEN len)
5820 {
5821     OP *words = NULL;
5822
5823     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5824     if (!s)
5825         missingterm(NULL, 0);
5826
5827     COPLINE_SET_FROM_MULTI_END;
5828     PL_expect = XOPERATOR;
5829     if (SvCUR(PL_lex_stuff)) {
5830         int warned_comma = !ckWARN(WARN_QW);
5831         int warned_comment = warned_comma;
5832         char *d = SvPV_force(PL_lex_stuff, len);
5833         while (len) {
5834             for (; isSPACE(*d) && len; --len, ++d)
5835                 /**/;
5836             if (len) {
5837                 SV *sv;
5838                 const char *b = d;
5839                 if (!warned_comma || !warned_comment) {
5840                     for (; !isSPACE(*d) && len; --len, ++d) {
5841                         if (!warned_comma && *d == ',') {
5842                             Perl_warner(aTHX_ packWARN(WARN_QW),
5843                                 "Possible attempt to separate words with commas");
5844                             ++warned_comma;
5845                         }
5846                         else if (!warned_comment && *d == '#') {
5847                             Perl_warner(aTHX_ packWARN(WARN_QW),
5848                                 "Possible attempt to put comments in qw() list");
5849                             ++warned_comment;
5850                         }
5851                     }
5852                 }
5853                 else {
5854                     for (; !isSPACE(*d) && len; --len, ++d)
5855                         /**/;
5856                 }
5857                 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5858                 words = op_append_elem(OP_LIST, words,
5859                                        newSVOP(OP_CONST, 0, tokeq(sv)));
5860             }
5861         }
5862     }
5863     if (!words)
5864         words = newNULLLIST();
5865     SvREFCNT_dec_NN(PL_lex_stuff);
5866     PL_lex_stuff = NULL;
5867     PL_expect = XOPERATOR;
5868     pl_yylval.opval = sawparens(words);
5869     TOKEN(QWLIST);
5870 }
5871
5872 static int
5873 yyl_hyphen(pTHX_ char *s)
5874 {
5875     if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5876         I32 ftst = 0;
5877         char tmp;
5878
5879         s++;
5880         PL_bufptr = s;
5881         tmp = *s++;
5882
5883         while (s < PL_bufend && SPACE_OR_TAB(*s))
5884             s++;
5885
5886         if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5887             s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5888             DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5889             OPERATOR(PERLY_MINUS);              /* unary minus */
5890         }
5891         switch (tmp) {
5892         case 'r': ftst = OP_FTEREAD;    break;
5893         case 'w': ftst = OP_FTEWRITE;   break;
5894         case 'x': ftst = OP_FTEEXEC;    break;
5895         case 'o': ftst = OP_FTEOWNED;   break;
5896         case 'R': ftst = OP_FTRREAD;    break;
5897         case 'W': ftst = OP_FTRWRITE;   break;
5898         case 'X': ftst = OP_FTREXEC;    break;
5899         case 'O': ftst = OP_FTROWNED;   break;
5900         case 'e': ftst = OP_FTIS;       break;
5901         case 'z': ftst = OP_FTZERO;     break;
5902         case 's': ftst = OP_FTSIZE;     break;
5903         case 'f': ftst = OP_FTFILE;     break;
5904         case 'd': ftst = OP_FTDIR;      break;
5905         case 'l': ftst = OP_FTLINK;     break;
5906         case 'p': ftst = OP_FTPIPE;     break;
5907         case 'S': ftst = OP_FTSOCK;     break;
5908         case 'u': ftst = OP_FTSUID;     break;
5909         case 'g': ftst = OP_FTSGID;     break;
5910         case 'k': ftst = OP_FTSVTX;     break;
5911         case 'b': ftst = OP_FTBLK;      break;
5912         case 'c': ftst = OP_FTCHR;      break;
5913         case 't': ftst = OP_FTTTY;      break;
5914         case 'T': ftst = OP_FTTEXT;     break;
5915         case 'B': ftst = OP_FTBINARY;   break;
5916         case 'M': case 'A': case 'C':
5917             gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5918             switch (tmp) {
5919             case 'M': ftst = OP_FTMTIME; break;
5920             case 'A': ftst = OP_FTATIME; break;
5921             case 'C': ftst = OP_FTCTIME; break;
5922             default:                     break;
5923             }
5924             break;
5925         default:
5926             break;
5927         }
5928         if (ftst) {
5929             PL_last_uni = PL_oldbufptr;
5930             PL_last_lop_op = (OPCODE)ftst;
5931             DEBUG_T( {
5932                 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5933             } );
5934             FTST(ftst);
5935         }
5936         else {
5937             /* Assume it was a minus followed by a one-letter named
5938              * subroutine call (or a -bareword), then. */
5939             DEBUG_T( {
5940                 PerlIO_printf(Perl_debug_log,
5941                     "### '-%c' looked like a file test but was not\n",
5942                     (int) tmp);
5943             } );
5944             s = --PL_bufptr;
5945         }
5946     }
5947     {
5948         const char tmp = *s++;
5949         if (*s == tmp) {
5950             s++;
5951             if (PL_expect == XOPERATOR)
5952                 TERM(POSTDEC);
5953             else
5954                 OPERATOR(PREDEC);
5955         }
5956         else if (*s == '>') {
5957             s++;
5958             s = skipspace(s);
5959             if (((*s == '$' || *s == '&') && s[1] == '*')
5960               ||(*s == '$' && s[1] == '#' && s[2] == '*')
5961               ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5962               ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5963              )
5964             {
5965                 PL_expect = XPOSTDEREF;
5966                 TOKEN(ARROW);
5967             }
5968             if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5969                 s = force_word(s,METHCALL0,FALSE,TRUE);
5970                 TOKEN(ARROW);
5971             }
5972             else if (*s == '$')
5973                 OPERATOR(ARROW);
5974             else
5975                 TERM(ARROW);
5976         }
5977         if (PL_expect == XOPERATOR) {
5978             if (*s == '='
5979                 && !PL_lex_allbrackets
5980                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5981             {
5982                 s--;
5983                 TOKEN(0);
5984             }
5985             Aop(OP_SUBTRACT);
5986         }
5987         else {
5988             if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5989                 check_uni();
5990             OPERATOR(PERLY_MINUS);              /* unary minus */
5991         }
5992     }
5993 }
5994
5995 static int
5996 yyl_plus(pTHX_ char *s)
5997 {
5998     const char tmp = *s++;
5999     if (*s == tmp) {
6000         s++;
6001         if (PL_expect == XOPERATOR)
6002             TERM(POSTINC);
6003         else
6004             OPERATOR(PREINC);
6005     }
6006     if (PL_expect == XOPERATOR) {
6007         if (*s == '='
6008             && !PL_lex_allbrackets
6009             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6010         {
6011             s--;
6012             TOKEN(0);
6013         }
6014         Aop(OP_ADD);
6015     }
6016     else {
6017         if (isSPACE(*s) || !isSPACE(*PL_bufptr))
6018             check_uni();
6019         OPERATOR(PERLY_PLUS);
6020     }
6021 }
6022
6023 static int
6024 yyl_star(pTHX_ char *s)
6025 {
6026     if (PL_expect == XPOSTDEREF)
6027         POSTDEREF(PERLY_STAR);
6028
6029     if (PL_expect != XOPERATOR) {
6030         s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
6031         PL_expect = XOPERATOR;
6032         force_ident(PL_tokenbuf, PERLY_STAR);
6033         if (!*PL_tokenbuf)
6034             PREREF(PERLY_STAR);
6035         TERM(PERLY_STAR);
6036     }
6037
6038     s++;
6039     if (*s == '*') {
6040         s++;
6041         if (*s == '=' && !PL_lex_allbrackets
6042             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6043         {
6044             s -= 2;
6045             TOKEN(0);
6046         }
6047         PWop(OP_POW);
6048     }
6049
6050     if (*s == '='
6051         && !PL_lex_allbrackets
6052         && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6053     {
6054         s--;
6055         TOKEN(0);
6056     }
6057
6058     Mop(OP_MULTIPLY);
6059 }
6060
6061 static int
6062 yyl_percent(pTHX_ char *s)
6063 {
6064     if (PL_expect == XOPERATOR) {
6065         if (s[1] == '='
6066             && !PL_lex_allbrackets
6067             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6068         {
6069             TOKEN(0);
6070         }
6071         ++s;
6072         Mop(OP_MODULO);
6073     }
6074     else if (PL_expect == XPOSTDEREF)
6075         POSTDEREF(PERLY_PERCENT_SIGN);
6076
6077     PL_tokenbuf[0] = '%';
6078     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6079     pl_yylval.ival = 0;
6080     if (!PL_tokenbuf[1]) {
6081         PREREF(PERLY_PERCENT_SIGN);
6082     }
6083     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6084         && intuit_more(s, PL_bufend)) {
6085         if (*s == '[')
6086             PL_tokenbuf[0] = '@';
6087     }
6088     PL_expect = XOPERATOR;
6089     force_ident_maybe_lex('%');
6090     TERM(PERLY_PERCENT_SIGN);
6091 }
6092
6093 static int
6094 yyl_caret(pTHX_ char *s)
6095 {
6096     char *d = s;
6097     const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
6098     if (bof && s[1] == '.')
6099         s++;
6100     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6101             (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
6102     {
6103         s = d;
6104         TOKEN(0);
6105     }
6106     s++;
6107     BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
6108 }
6109
6110 static int
6111 yyl_colon(pTHX_ char *s)
6112 {
6113     OP *attrs;
6114
6115     switch (PL_expect) {
6116     case XOPERATOR:
6117         if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
6118             break;
6119         PL_bufptr = s;  /* update in case we back off */
6120         if (*s == '=') {
6121             Perl_croak(aTHX_
6122                        "Use of := for an empty attribute list is not allowed");
6123         }
6124         goto grabattrs;
6125     case XATTRBLOCK:
6126         PL_expect = XBLOCK;
6127         goto grabattrs;
6128     case XATTRTERM:
6129         PL_expect = XTERMBLOCK;
6130      grabattrs:
6131         /* NB: as well as parsing normal attributes, we also end up
6132          * here if there is something looking like attributes
6133          * following a signature (which is illegal, but used to be
6134          * legal in 5.20..5.26). If the latter, we still parse the
6135          * attributes so that error messages(s) are less confusing,
6136          * but ignore them (parser->sig_seen).
6137          */
6138         s = skipspace(s);
6139         attrs = NULL;
6140         while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6141             I32 tmp;
6142             SV *sv;
6143             STRLEN len;
6144             char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
6145             if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
6146                 if (tmp < 0) tmp = -tmp;
6147                 switch (tmp) {
6148                 case KEY_or:
6149                 case KEY_and:
6150                 case KEY_for:
6151                 case KEY_foreach:
6152                 case KEY_unless:
6153                 case KEY_if:
6154                 case KEY_while:
6155                 case KEY_until:
6156                     goto got_attrs;
6157                 default:
6158                     break;
6159                 }
6160             }
6161             sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
6162             if (*d == '(') {
6163                 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
6164                 if (!d) {
6165                     op_free(attrs);
6166                     ASSUME(sv && SvREFCNT(sv) == 1);
6167                     SvREFCNT_dec(sv);
6168                     Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
6169                 }
6170                 COPLINE_SET_FROM_MULTI_END;
6171             }
6172             if (PL_lex_stuff) {
6173                 sv_catsv(sv, PL_lex_stuff);
6174                 attrs = op_append_elem(OP_LIST, attrs,
6175                                     newSVOP(OP_CONST, 0, sv));
6176                 SvREFCNT_dec_NN(PL_lex_stuff);
6177                 PL_lex_stuff = NULL;
6178             }
6179             else {
6180                 attrs = op_append_elem(OP_LIST, attrs,
6181                                     newSVOP(OP_CONST, 0, sv));
6182             }
6183             s = skipspace(d);
6184             if (*s == ':' && s[1] != ':')
6185                 s = skipspace(s+1);
6186             else if (s == d)
6187                 break;  /* require real whitespace or :'s */
6188             /* XXX losing whitespace on sequential attributes here */
6189         }
6190
6191         if (*s != ';'
6192             && *s != '}'
6193             && !(PL_expect == XOPERATOR
6194                    /* if an operator is expected, permit =, //= and ||= or ) to end */
6195                  ? (*s == '=' || *s == ')' || *s == '/' || *s == '|')
6196                  : (*s == '{' || *s == '(')))
6197         {
6198             const char q = ((*s == '\'') ? '"' : '\'');
6199             /* If here for an expression, and parsed no attrs, back off. */
6200             if (PL_expect == XOPERATOR && !attrs) {
6201                 s = PL_bufptr;
6202                 break;
6203             }
6204             /* MUST advance bufptr here to avoid bogus "at end of line"
6205                context messages from yyerror().
6206             */
6207             PL_bufptr = s;
6208             yyerror( (const char *)
6209                      (*s
6210                       ? Perl_form(aTHX_ "Invalid separator character "
6211                                   "%c%c%c in attribute list", q, *s, q)
6212                       : "Unterminated attribute list" ) );
6213             op_free(attrs);
6214             OPERATOR(PERLY_COLON);
6215         }
6216
6217     got_attrs:
6218         if (PL_parser->sig_seen) {
6219             /* see comment about about sig_seen and parser error
6220              * handling */
6221             op_free(attrs);
6222             Perl_croak(aTHX_ "Subroutine attributes must come "
6223                              "before the signature");
6224         }
6225         if (attrs) {
6226             NEXTVAL_NEXTTOKE.opval = attrs;
6227             force_next(THING);
6228         }
6229         TOKEN(COLONATTR);
6230     }
6231
6232     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6233         s--;
6234         TOKEN(0);
6235     }
6236
6237     PL_lex_allbrackets--;
6238     OPERATOR(PERLY_COLON);
6239 }
6240
6241 static int
6242 yyl_subproto(pTHX_ char *s, CV *cv)
6243 {
6244     STRLEN protolen = CvPROTOLEN(cv);
6245     const char *proto = CvPROTO(cv);
6246     bool optional;
6247
6248     proto = S_strip_spaces(aTHX_ proto, &protolen);
6249     if (!protolen)
6250         TERM(FUNC0SUB);
6251     if ((optional = *proto == ';')) {
6252         do {
6253             proto++;
6254         } while (*proto == ';');
6255     }
6256
6257     if (
6258         (
6259             (
6260                 *proto == '$' || *proto == '_'
6261              || *proto == '*' || *proto == '+'
6262             )
6263          && proto[1] == '\0'
6264         )
6265      || (
6266          *proto == '\\' && proto[1] && proto[2] == '\0'
6267         )
6268     ) {
6269         UNIPROTO(UNIOPSUB,optional);
6270     }
6271
6272     if (*proto == '\\' && proto[1] == '[') {
6273         const char *p = proto + 2;
6274         while(*p && *p != ']')
6275             ++p;
6276         if(*p == ']' && !p[1])
6277             UNIPROTO(UNIOPSUB,optional);
6278     }
6279
6280     if (*proto == '&' && *s == '{') {
6281         if (PL_curstash)
6282             sv_setpvs(PL_subname, "__ANON__");
6283         else
6284             sv_setpvs(PL_subname, "__ANON__::__ANON__");
6285         if (!PL_lex_allbrackets
6286             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6287         {
6288             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6289         }
6290         PREBLOCK(LSTOPSUB);
6291     }
6292
6293     return KEY_NULL;
6294 }
6295
6296 static int
6297 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6298 {
6299     char *d;
6300     if (PL_lex_brackets > 100) {
6301         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6302     }
6303
6304     switch (PL_expect) {
6305     case XTERM:
6306     case XTERMORDORDOR:
6307         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6308         PL_lex_allbrackets++;
6309         OPERATOR(HASHBRACK);
6310     case XOPERATOR:
6311         while (s < PL_bufend && SPACE_OR_TAB(*s))
6312             s++;
6313         d = s;
6314         PL_tokenbuf[0] = '\0';
6315         if (d < PL_bufend && *d == '-') {
6316             PL_tokenbuf[0] = '-';
6317             d++;
6318             while (d < PL_bufend && SPACE_OR_TAB(*d))
6319                 d++;
6320         }
6321         if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6322             STRLEN len;
6323             d = scan_word6(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6324                           FALSE, &len, FALSE);
6325             while (d < PL_bufend && SPACE_OR_TAB(*d))
6326                 d++;
6327             if (*d == '}') {
6328                 const char minus = (PL_tokenbuf[0] == '-');
6329                 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6330                 if (minus)
6331                     force_next(PERLY_MINUS);
6332             }
6333         }
6334         /* FALLTHROUGH */
6335     case XATTRTERM:
6336     case XTERMBLOCK:
6337         PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6338         PL_lex_allbrackets++;
6339         PL_expect = XSTATE;
6340         break;
6341     case XATTRBLOCK:
6342     case XBLOCK:
6343         PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6344         PL_lex_allbrackets++;
6345         PL_expect = XSTATE;
6346         break;
6347     case XBLOCKTERM:
6348         PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6349         PL_lex_allbrackets++;
6350         PL_expect = XSTATE;
6351         break;
6352     default: {
6353             const char *t;
6354             if (PL_oldoldbufptr == PL_last_lop)
6355                 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6356             else
6357                 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6358             PL_lex_allbrackets++;
6359             s = skipspace(s);
6360             if (*s == '}') {
6361                 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6362                     PL_expect = XTERM;
6363                     /* This hack is to get the ${} in the message. */
6364                     PL_bufptr = s+1;
6365                     yyerror("syntax error");
6366                     yyquit();
6367                     break;
6368                 }
6369                 OPERATOR(HASHBRACK);
6370             }
6371             if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6372                 /* ${...} or @{...} etc., but not print {...}
6373                  * Skip the disambiguation and treat this as a block.
6374                  */
6375                 goto block_expectation;
6376             }
6377             /* This hack serves to disambiguate a pair of curlies
6378              * as being a block or an anon hash.  Normally, expectation
6379              * determines that, but in cases where we're not in a
6380              * position to expect anything in particular (like inside
6381              * eval"") we have to resolve the ambiguity.  This code
6382              * covers the case where the first term in the curlies is a
6383              * quoted string.  Most other cases need to be explicitly
6384              * disambiguated by prepending a "+" before the opening
6385              * curly in order to force resolution as an anon hash.
6386              *
6387              * XXX should probably propagate the outer expectation
6388              * into eval"" to rely less on this hack, but that could
6389              * potentially break current behavior of eval"".
6390              * GSAR 97-07-21
6391              */
6392             t = s;
6393             if (*s == '\'' || *s == '"' || *s == '`') {
6394                 /* common case: get past first string, handling escapes */
6395                 for (t++; t < PL_bufend && *t != *s;)
6396                     if (*t++ == '\\')
6397                         t++;
6398                 t++;
6399             }
6400             else if (*s == 'q') {
6401                 if (++t < PL_bufend
6402                     && (!isWORDCHAR(*t)
6403                         || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6404                             && !isWORDCHAR(*t))))
6405                 {
6406                     /* skip q//-like construct */
6407                     const char *tmps;
6408                     char open, close, term;
6409                     I32 brackets = 1;
6410
6411                     while (t < PL_bufend && isSPACE(*t))
6412                         t++;
6413                     /* check for q => */
6414                     if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6415                         OPERATOR(HASHBRACK);
6416                     }
6417                     term = *t;
6418                     open = term;
6419                     if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6420                         term = tmps[5];
6421                     close = term;
6422                     if (open == close)
6423                         for (t++; t < PL_bufend; t++) {
6424                             if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6425                                 t++;
6426                             else if (*t == open)
6427                                 break;
6428                         }
6429                     else {
6430                         for (t++; t < PL_bufend; t++) {
6431                             if (*t == '\\' && t+1 < PL_bufend)
6432                                 t++;
6433                             else if (*t == close && --brackets <= 0)
6434                                 break;
6435                             else if (*t == open)
6436                                 brackets++;
6437                         }
6438                     }
6439                     t++;
6440                 }
6441                 else
6442                     /* skip plain q word */
6443                     while (   t < PL_bufend
6444                            && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6445                     {
6446                         t += UTF ? UTF8SKIP(t) : 1;
6447                     }
6448             }
6449             else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6450                 t += UTF ? UTF8SKIP(t) : 1;
6451                 while (   t < PL_bufend
6452                        && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6453                 {
6454                     t += UTF ? UTF8SKIP(t) : 1;
6455                 }
6456             }
6457             while (t < PL_bufend && isSPACE(*t))
6458                 t++;
6459             /* if comma follows first term, call it an anon hash */
6460             /* XXX it could be a comma expression with loop modifiers */
6461             if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6462                                || (*t == '=' && t[1] == '>')))
6463                 OPERATOR(HASHBRACK);
6464             if (PL_expect == XREF) {
6465               block_expectation:
6466                 /* If there is an opening brace or 'sub:', treat it
6467                    as a term to make ${{...}}{k} and &{sub:attr...}
6468                    dwim.  Otherwise, treat it as a statement, so
6469                    map {no strict; ...} works.
6470                  */
6471                 s = skipspace(s);
6472                 if (*s == '{') {
6473                     PL_expect = XTERM;
6474                     break;
6475                 }
6476                 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6477                     PL_bufptr = s;
6478                     d = s + 3;
6479                     d = skipspace(d);
6480                     s = PL_bufptr;
6481                     if (*d == ':') {
6482                         PL_expect = XTERM;
6483                         break;
6484                     }
6485                 }
6486                 PL_expect = XSTATE;
6487             }
6488             else {
6489                 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6490                 PL_expect = XSTATE;
6491             }
6492         }
6493         break;
6494     }
6495
6496     pl_yylval.ival = CopLINE(PL_curcop);
6497     PL_copline = NOLINE;   /* invalidate current command line number */
6498     TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6499 }
6500
6501 static int
6502 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6503 {
6504     assert(s != PL_bufend);
6505     s++;
6506
6507     if (PL_lex_brackets <= 0)
6508         /* diag_listed_as: Unmatched right %s bracket */
6509         yyerror("Unmatched right curly bracket");
6510     else
6511         PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6512
6513     PL_lex_allbrackets--;
6514
6515     if (PL_lex_state == LEX_INTERPNORMAL) {
6516         if (PL_lex_brackets == 0) {
6517             if (PL_expect & XFAKEBRACK) {
6518                 PL_expect &= XENUMMASK;
6519                 PL_lex_state = LEX_INTERPEND;
6520                 PL_bufptr = s;
6521                 return yylex(); /* ignore fake brackets */
6522             }
6523             if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6524              && SvEVALED(PL_lex_repl))
6525                 PL_lex_state = LEX_INTERPEND;
6526             else if (*s == '-' && s[1] == '>')
6527                 PL_lex_state = LEX_INTERPENDMAYBE;
6528             else if (*s != '[' && *s != '{')
6529                 PL_lex_state = LEX_INTERPEND;
6530         }
6531     }
6532
6533     if (PL_expect & XFAKEBRACK) {
6534         PL_expect &= XENUMMASK;
6535         PL_bufptr = s;
6536         return yylex();         /* ignore fake brackets */
6537     }
6538
6539     force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6540     if (formbrack) LEAVE_with_name("lex_format");
6541     if (formbrack == 2) { /* means . where arguments were expected */
6542         force_next(PERLY_SEMICOLON);
6543         TOKEN(FORMRBRACK);
6544     }
6545
6546     TOKEN(PERLY_SEMICOLON);
6547 }
6548
6549 static int
6550 yyl_ampersand(pTHX_ char *s)
6551 {
6552     if (PL_expect == XPOSTDEREF)
6553         POSTDEREF(PERLY_AMPERSAND);
6554
6555     s++;
6556     if (*s++ == '&') {
6557         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6558                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6559             s -= 2;
6560             TOKEN(0);
6561         }
6562         AOPERATOR(ANDAND);
6563     }
6564     s--;
6565
6566     if (PL_expect == XOPERATOR) {
6567         char *d;
6568         bool bof;
6569         if (   PL_bufptr == PL_linestart
6570             && ckWARN(WARN_SEMICOLON)
6571             && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6572         {
6573             CopLINE_dec(PL_curcop);
6574             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6575             CopLINE_inc(PL_curcop);
6576         }
6577         d = s;
6578         if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6579             s++;
6580         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6581                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6582             s = d;
6583             s--;
6584             TOKEN(0);
6585         }
6586         if (d == s)
6587             BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6588         else
6589             BAop(OP_SBIT_AND);
6590     }
6591
6592     PL_tokenbuf[0] = '&';
6593     s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6594     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6595
6596     if (PL_tokenbuf[1])
6597         force_ident_maybe_lex('&');
6598     else
6599         PREREF(PERLY_AMPERSAND);
6600
6601     TERM(PERLY_AMPERSAND);
6602 }
6603
6604 static int
6605 yyl_verticalbar(pTHX_ char *s)
6606 {
6607     char *d;
6608     bool bof;
6609
6610     s++;
6611     if (*s++ == '|') {
6612         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6613                 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6614             s -= 2;
6615             TOKEN(0);
6616         }
6617         AOPERATOR(OROR);
6618     }
6619
6620     s--;
6621     d = s;
6622     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6623         s++;
6624
6625     if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6626             (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6627         s = d - 1;
6628         TOKEN(0);
6629     }
6630
6631     BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6632 }
6633
6634 static int
6635 yyl_bang(pTHX_ char *s)
6636 {
6637     const char tmp = *s++;
6638     if (tmp == '=') {
6639         /* was this !=~ where !~ was meant?
6640          * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6641
6642         if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6643             const char *t = s+1;
6644
6645             while (t < PL_bufend && isSPACE(*t))
6646                 ++t;
6647
6648             if (*t == '/' || *t == '?'
6649                 || ((*t == 'm' || *t == 's' || *t == 'y')
6650                     && !isWORDCHAR(t[1]))
6651                 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6652                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6653                             "!=~ should be !~");
6654         }
6655
6656         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6657             s -= 2;
6658             TOKEN(0);
6659         }
6660
6661         ChEop(OP_NE);
6662     }
6663
6664     if (tmp == '~')
6665         PMop(OP_NOT);
6666
6667     s--;
6668     OPERATOR(PERLY_EXCLAMATION_MARK);
6669 }
6670
6671 static int
6672 yyl_snail(pTHX_ char *s)
6673 {
6674     if (PL_expect == XPOSTDEREF)
6675         POSTDEREF(PERLY_SNAIL);
6676     PL_tokenbuf[0] = '@';
6677     s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6678     if (PL_expect == XOPERATOR) {
6679         char *d = s;
6680         if (PL_bufptr > s) {
6681             d = PL_bufptr-1;
6682             PL_bufptr = PL_oldbufptr;
6683         }
6684         no_op("Array", d);
6685     }
6686     pl_yylval.ival = 0;
6687     if (!PL_tokenbuf[1]) {
6688         PREREF(PERLY_SNAIL);
6689     }
6690     if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6691         s = skipspace(s);
6692     if (   (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6693         && intuit_more(s, PL_bufend))
6694     {
6695         if (*s == '{')
6696             PL_tokenbuf[0] = '%';
6697
6698         /* Warn about @ where they meant $. */
6699         if (*s == '[' || *s == '{') {
6700             if (ckWARN(WARN_SYNTAX)) {
6701                 S_check_scalar_slice(aTHX_ s);
6702             }
6703         }
6704     }
6705     PL_expect = XOPERATOR;
6706     force_ident_maybe_lex('@');
6707     TERM(PERLY_SNAIL);
6708 }
6709
6710 static int
6711 yyl_slash(pTHX_ char *s)
6712 {
6713     if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6714         if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6715                 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6716             TOKEN(0);
6717         s += 2;
6718         AOPERATOR(DORDOR);
6719     }
6720     else if (PL_expect == XOPERATOR) {
6721         s++;
6722         if (*s == '=' && !PL_lex_allbrackets
6723             && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6724         {
6725             s--;
6726             TOKEN(0);
6727         }
6728         Mop(OP_DIVIDE);
6729     }
6730     else {
6731         /* Disable warning on "study /blah/" */
6732         if (    PL_oldoldbufptr == PL_last_uni
6733             && (   *PL_last_uni != 's' || s - PL_last_uni < 5
6734                 || memNE(PL_last_uni, "study", 5)
6735                 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6736          ))
6737             check_uni();
6738         s = scan_pat(s,OP_MATCH);
6739         TERM(sublex_start());
6740     }
6741 }
6742
6743 static int
6744 yyl_leftsquare(pTHX_ char *s)
6745 {
6746     if (PL_lex_brackets > 100)
6747         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6748     PL_lex_brackstack[PL_lex_brackets++] = 0;
6749     PL_lex_allbrackets++;
6750     s++;
6751     OPERATOR(PERLY_BRACKET_OPEN);
6752 }
6753
6754 static int
6755 yyl_rightsquare(pTHX_ char *s)
6756 {
6757     if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6758         TOKEN(0);
6759     s++;
6760     if (PL_lex_brackets <= 0)
6761         /* diag_listed_as: Unmatched right %s bracket */
6762         yyerror("Unmatched right square bracket");
6763     else
6764         --PL_lex_brackets;
6765     PL_lex_allbrackets--;
6766     if (PL_lex_state == LEX_INTERPNORMAL) {
6767         if (PL_lex_brackets == 0) {
6768             if (*s == '-' && s[1] == '>')
6769                 PL_lex_state = LEX_INTERPENDMAYBE;
6770             else if (*s != '[' && *s != '{')
6771                 PL_lex_state = LEX_INTERPEND;
6772         }
6773     }
6774     TERM(PERLY_BRACKET_CLOSE);
6775 }
6776
6777 static int
6778 yyl_tilde(pTHX_ char *s)
6779 {
6780     bool bof;
6781     if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6782         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6783             TOKEN(0);
6784         s += 2;
6785         Perl_ck_warner_d(aTHX_
6786             packWARN(WARN_DEPRECATED__SMARTMATCH),
6787             "Smartmatch is deprecated");
6788         NCEop(OP_SMARTMATCH);
6789     }
6790     s++;
6791     if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6792         s++;
6793         BCop(OP_SCOMPLEMENT);
6794     }
6795     BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6796 }
6797
6798 static int
6799 yyl_leftparen(pTHX_ char *s)
6800 {
6801     if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6802         PL_oldbufptr = PL_oldoldbufptr;         /* allow print(STDOUT 123) */
6803     else
6804         PL_expect = XTERM;
6805     s = skipspace(s);
6806     PL_lex_allbrackets++;
6807     TOKEN(PERLY_PAREN_OPEN);
6808 }
6809
6810 static int
6811 yyl_rightparen(pTHX_ char *s)
6812 {
6813     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6814         TOKEN(0);
6815     s++;
6816     PL_lex_allbrackets--;
6817     s = skipspace(s);
6818     if (*s == '{')
6819         PREBLOCK(PERLY_PAREN_CLOSE);
6820     TERM(PERLY_PAREN_CLOSE);
6821 }
6822
6823 static int
6824 yyl_leftpointy(pTHX_ char *s)
6825 {
6826     char tmp;
6827
6828     if (PL_expect != XOPERATOR) {
6829         if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6830             check_uni();
6831         if (s[1] == '<' && s[2] != '>')
6832             s = scan_heredoc(s);
6833         else
6834             s = scan_inputsymbol(s);
6835         PL_expect = XOPERATOR;
6836         TOKEN(sublex_start());
6837     }
6838
6839     s++;
6840
6841     tmp = *s++;
6842     if (tmp == '<') {
6843         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6844             s -= 2;
6845             TOKEN(0);
6846         }
6847         SHop(OP_LEFT_SHIFT);
6848     }
6849     if (tmp == '=') {
6850         tmp = *s++;
6851         if (tmp == '>') {
6852             if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6853                 s -= 3;
6854                 TOKEN(0);
6855             }
6856             NCEop(OP_NCMP);
6857         }
6858         s--;
6859         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6860             s -= 2;
6861             TOKEN(0);
6862         }
6863         ChRop(OP_LE);
6864     }
6865
6866     s--;
6867     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6868         s--;
6869         TOKEN(0);
6870     }
6871
6872     ChRop(OP_LT);
6873 }
6874
6875 static int
6876 yyl_rightpointy(pTHX_ char *s)
6877 {
6878     const char tmp = *s++;
6879
6880     if (tmp == '>') {
6881         if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6882             s -= 2;
6883             TOKEN(0);
6884         }
6885         SHop(OP_RIGHT_SHIFT);
6886     }
6887     else if (tmp == '=') {
6888         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6889             s -= 2;
6890             TOKEN(0);
6891         }
6892         ChRop(OP_GE);
6893     }
6894
6895     s--;
6896     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6897         s--;
6898         TOKEN(0);
6899     }
6900
6901     ChRop(OP_GT);
6902 }
6903
6904 static int
6905 yyl_sglquote(pTHX_ char *s)
6906 {
6907     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6908     if (!s)
6909         missingterm(NULL, 0);
6910     COPLINE_SET_FROM_MULTI_END;
6911     DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6912     if (PL_expect == XOPERATOR) {
6913         no_op("String",s);
6914     }
6915     pl_yylval.ival = OP_CONST;
6916     TERM(sublex_start());
6917 }
6918
6919 static int
6920 yyl_dblquote(pTHX_ char *s)
6921 {
6922     char *d;
6923     STRLEN len;
6924     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6925     DEBUG_T( {
6926         if (s)
6927             printbuf("### Saw string before %s\n", s);
6928         else
6929             PerlIO_printf(Perl_debug_log,
6930                          "### Saw unterminated string\n");
6931     } );
6932     if (PL_expect == XOPERATOR) {
6933             no_op("String",s);
6934     }
6935     if (!s)
6936         missingterm(NULL, 0);
6937     pl_yylval.ival = OP_CONST;
6938     /* FIXME. I think that this can be const if char *d is replaced by
6939        more localised variables.  */
6940     for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6941         if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6942             pl_yylval.ival = OP_STRINGIFY;
6943             break;
6944         }
6945     }
6946     if (pl_yylval.ival == OP_CONST)
6947         COPLINE_SET_FROM_MULTI_END;
6948     TERM(sublex_start());
6949 }
6950
6951 static int
6952 yyl_backtick(pTHX_ char *s)
6953 {
6954     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6955     DEBUG_T( {
6956         if (s)
6957             printbuf("### Saw backtick string before %s\n", s);
6958         else
6959             PerlIO_printf(Perl_debug_log,
6960                          "### Saw unterminated backtick string\n");
6961     } );
6962     if (PL_expect == XOPERATOR)
6963         no_op("Backticks",s);
6964     if (!s)
6965         missingterm(NULL, 0);
6966     pl_yylval.ival = OP_BACKTICK;
6967     TERM(sublex_start());
6968 }
6969
6970 static int
6971 yyl_backslash(pTHX_ char *s)
6972 {
6973     if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6974         Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6975                        *s, *s);
6976     if (PL_expect == XOPERATOR)
6977         no_op("Backslash",s);
6978     OPERATOR(REFGEN);
6979 }
6980
6981 static void
6982 yyl_data_handle(pTHX)
6983 {
6984     HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6985                             ? PL_curstash
6986                             : PL_defstash;
6987     GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6988
6989     if (!isGV(gv))
6990         gv_init(gv,stash,"DATA",4,0);
6991
6992     GvMULTI_on(gv);
6993     if (!GvIO(gv))
6994         GvIOp(gv) = newIO();
6995     IoIFP(GvIOp(gv)) = PL_rsfp;
6996
6997     /* Mark this internal pseudo-handle as clean */
6998     IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6999     if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7000         IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7001     else
7002         IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7003
7004 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7005     /* if the script was opened in binmode, we need to revert
7006      * it to text mode for compatibility; but only iff it has CRs
7007      * XXX this is a questionable hack at best. */
7008     if (PL_bufend-PL_bufptr > 2
7009         && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7010     {
7011         Off_t loc = 0;
7012         if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7013             loc = PerlIO_tell(PL_rsfp);
7014             (void)PerlIO_seek(PL_rsfp, 0L, 0);
7015         }
7016         if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7017             if (loc > 0)
7018                 PerlIO_seek(PL_rsfp, loc, 0);
7019         }
7020     }
7021 #endif
7022
7023 #ifdef PERLIO_LAYERS
7024     if (!IN_BYTES) {
7025         if (UTF)
7026             PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7027     }
7028 #endif
7029
7030     PL_rsfp = NULL;
7031 }
7032
7033 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
7034     __attribute__noreturn__;
7035
7036 PERL_STATIC_NO_RET void
7037 yyl_croak_unrecognised(pTHX_ char *s)
7038 {
7039     SV *dsv = newSVpvs_flags("", SVs_TEMP);
7040     const char *c;
7041     char *d;
7042     STRLEN len;
7043
7044     if (UTF) {
7045         STRLEN skiplen = UTF8SKIP(s);
7046         STRLEN stravail = PL_bufend - s;
7047         c = sv_uni_display(dsv, newSVpvn_flags(s,
7048                                                skiplen > stravail ? stravail : skiplen,
7049                                                SVs_TEMP | SVf_UTF8),
7050                            10, UNI_DISPLAY_ISPRINT);
7051     }
7052     else {
7053         c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
7054     }
7055
7056     if (s >= PL_linestart) {
7057         d = PL_linestart;
7058     }
7059     else {
7060         /* somehow (probably due to a parse failure), PL_linestart has advanced
7061          * pass PL_bufptr, get a reasonable beginning of line
7062          */
7063         d = s;
7064         while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
7065             --d;
7066     }
7067     len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
7068     if (len > UNRECOGNIZED_PRECEDE_COUNT) {
7069         d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
7070     }
7071
7072     Perl_croak(aTHX_  "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
7073                       UTF8fARG(UTF, (s - d), d),
7074                      (int) len + 1);
7075 }
7076
7077 static int
7078 yyl_require(pTHX_ char *s, I32 orig_keyword)
7079 {
7080     s = skipspace(s);
7081     if (isDIGIT(*s)) {
7082         s = force_version(s, FALSE);
7083     }
7084     else if (*s != 'v' || !isDIGIT(s[1])
7085             || (s = force_version(s, TRUE), *s == 'v'))
7086     {
7087         *PL_tokenbuf = '\0';
7088         s = force_word(s,BAREWORD,TRUE,TRUE);
7089         if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
7090                                    PL_tokenbuf + sizeof(PL_tokenbuf),
7091                                    UTF))
7092         {
7093             gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7094                         GV_ADD | (UTF ? SVf_UTF8 : 0));
7095         }
7096         else if (*s == '<')
7097             yyerror("<> at require-statement should be quotes");
7098     }
7099
7100     if (orig_keyword == KEY_require)
7101         pl_yylval.ival = 1;
7102     else
7103         pl_yylval.ival = 0;
7104
7105     PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
7106     PL_bufptr = s;
7107     PL_last_uni = PL_oldbufptr;
7108     PL_last_lop_op = OP_REQUIRE;
7109     s = skipspace(s);
7110     return REPORT( (int)KW_REQUIRE );
7111 }
7112
7113 static int
7114 yyl_foreach(pTHX_ char *s)
7115 {
7116     if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7117         return REPORT(0);
7118     pl_yylval.ival = CopLINE(PL_curcop);
7119     s = skipspace(s);
7120     if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7121         char *p = s;
7122         SSize_t s_off = s - SvPVX(PL_linestr);
7123         bool paren_is_valid = FALSE;
7124         bool maybe_package = FALSE;
7125         bool saw_core = FALSE;
7126         bool core_valid = FALSE;
7127
7128         if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
7129             saw_core = TRUE;
7130             p += 6;
7131         }
7132         if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
7133             core_valid = TRUE;
7134             paren_is_valid = TRUE;
7135             if (isSPACE(p[2])) {
7136                 p = skipspace(p + 3);
7137                 maybe_package = TRUE;
7138             }
7139             else {
7140                 p += 2;
7141             }
7142         }
7143         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
7144             core_valid = TRUE;
7145             if (isSPACE(p[3])) {
7146                 p = skipspace(p + 4);
7147                 maybe_package = TRUE;
7148             }
7149             else {
7150                 p += 3;
7151             }
7152         }
7153         else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
7154             core_valid = TRUE;
7155             if (isSPACE(p[5])) {
7156                 p = skipspace(p + 6);
7157             }
7158             else {
7159                 p += 5;
7160             }
7161         }
7162         if (saw_core && !core_valid) {
7163             Perl_croak(aTHX_ "Missing $ on loop variable");
7164         }
7165
7166         if (maybe_package && !saw_core) {
7167             /* skip optional package name, as in "for my abc $x (..)" */
7168             if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
7169                 STRLEN len;
7170                 p = scan_word6(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7171                 p = skipspace(p);
7172                 paren_is_valid = FALSE;
7173             }
7174         }
7175
7176         if (UNLIKELY(paren_is_valid && *p == '(')) {
7177             Perl_ck_warner_d(aTHX_
7178                              packWARN(WARN_EXPERIMENTAL__FOR_LIST),
7179                              "for my (...) is experimental");
7180         }
7181         else if (UNLIKELY(*p != '$' && *p != '\\')) {
7182             /* "for myfoo (" will end up here, but with p pointing at the 'f' */
7183             Perl_croak(aTHX_ "Missing $ on loop variable");
7184         }
7185         /* The buffer may have been reallocated, update s */
7186         s = SvPVX(PL_linestr) + s_off;
7187     }
7188     OPERATOR(KW_FOR);
7189 }
7190
7191 static int
7192 yyl_do(pTHX_ char *s, I32 orig_keyword)
7193 {
7194     s = skipspace(s);
7195     if (*s == '{')
7196         PRETERMBLOCK(KW_DO);
7197     if (*s != '\'') {
7198         char *d;
7199         STRLEN len;
7200         *PL_tokenbuf = '&';
7201         d = scan_word6(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7202                       1, &len, TRUE);
7203         if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7204          && !keyword(PL_tokenbuf + 1, len, 0)) {
7205             SSize_t off = s-SvPVX(PL_linestr);
7206             d = skipspace(d);
7207             s = SvPVX(PL_linestr)+off;
7208             if (*d == '(') {
7209                 force_ident_maybe_lex('&');
7210                 s = d;
7211             }
7212         }
7213     }
7214     if (orig_keyword == KEY_do)
7215         pl_yylval.ival = 1;
7216     else
7217         pl_yylval.ival = 0;
7218     OPERATOR(KW_DO);
7219 }
7220
7221 static int
7222 yyl_my(pTHX_ char *s, I32 my)
7223 {
7224     if (PL_in_my) {
7225         PL_bufptr = s;
7226         yyerror(Perl_form(aTHX_
7227                           "Can't redeclare \"%s\" in \"%s\"",
7228                            my       == KEY_my    ? "my" :
7229                            my       == KEY_state ? "state" : "our",
7230                            PL_in_my == KEY_my    ? "my" :
7231                            PL_in_my == KEY_state ? "state" : "our"));
7232     }
7233     PL_in_my = (U16)my;
7234     s = skipspace(s);
7235     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7236         STRLEN len;
7237         s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len, TRUE);
7238         if (memEQs(PL_tokenbuf, len, "sub"))
7239             return yyl_sub(aTHX_ s, my);
7240         PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7241         if (!PL_in_my_stash) {
7242             char tmpbuf[1024];
7243             int i;
7244             PL_bufptr = s;
7245             i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7246             PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7247             yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7248         }
7249     }
7250     else if (*s == '\\') {
7251         if (!FEATURE_MYREF_IS_ENABLED)
7252             Perl_croak(aTHX_ "The experimental declared_refs "
7253                              "feature is not enabled");
7254         Perl_ck_warner_d(aTHX_
7255              packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7256             "Declaring references is experimental");
7257     }
7258     OPERATOR(KW_MY);
7259 }
7260
7261 static int yyl_try(pTHX_ char*);
7262
7263 static bool
7264 yyl_eol_needs_semicolon(pTHX_ char **ps)
7265 {
7266     char *s = *ps;
7267     if (PL_lex_state != LEX_NORMAL
7268         || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7269     {
7270         const bool in_comment = *s == '#';
7271         char *d;
7272         if (*s == '#' && s == PL_linestart && PL_in_eval
7273          && !PL_rsfp && !PL_parser->filtered) {
7274             /* handle eval qq[#line 1 "foo"\n ...] */
7275             CopLINE_dec(PL_curcop);
7276             incline(s, PL_bufend);
7277         }
7278         d = s;
7279         while (d < PL_bufend && *d != '\n')
7280             d++;
7281         if (d < PL_bufend)
7282             d++;
7283         s = d;
7284         if (in_comment && d == PL_bufend
7285             && PL_lex_state == LEX_INTERPNORMAL
7286             && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7287             && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7288         else
7289             incline(s, PL_bufend);
7290         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7291             PL_lex_state = LEX_FORMLINE;
7292             force_next(FORMRBRACK);
7293             *ps = s;
7294             return TRUE;
7295         }
7296     }
7297     else {
7298         while (s < PL_bufend && *s != '\n')
7299             s++;
7300         if (s < PL_bufend) {
7301             s++;
7302             if (s < PL_bufend)
7303                 incline(s, PL_bufend);
7304         }
7305     }
7306     *ps = s;
7307     return FALSE;
7308 }
7309
7310 static int
7311 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7312 {
7313     char *d;
7314
7315     goto start;
7316
7317     do {
7318         fake_eof = 0;
7319         bof = cBOOL(PL_rsfp);
7320       start:
7321
7322         PL_bufptr = PL_bufend;
7323         COPLINE_INC_WITH_HERELINES;
7324         if (!lex_next_chunk(fake_eof)) {
7325             CopLINE_dec(PL_curcop);
7326             s = PL_bufptr;
7327             TOKEN(PERLY_SEMICOLON);     /* not infinite loop because rsfp is NULL now */
7328         }
7329         CopLINE_dec(PL_curcop);
7330         s = PL_bufptr;
7331         /* If it looks like the start of a BOM or raw UTF-16,
7332          * check if it in fact is. */
7333         if (bof && PL_rsfp
7334             && (   *s == 0
7335                 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7336                 || *(U8*)s >= 0xFE
7337                 || s[1] == 0))
7338         {
7339             Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7340             bof = (offset == (Off_t)SvCUR(PL_linestr));
7341 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7342             /* offset may include swallowed CR */
7343             if (!bof)
7344                 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7345 #endif
7346             if (bof) {
7347                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7348                 s = swallow_bom((U8*)s);
7349             }
7350         }
7351         if (PL_parser->in_pod) {
7352             /* Incest with pod. */
7353             if (    memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7354                 && !isALPHA(s[4]))
7355             {
7356                 SvPVCLEAR(PL_linestr);
7357                 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7358                 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7359                 PL_last_lop = PL_last_uni = NULL;
7360                 PL_parser->in_pod = 0;
7361             }
7362         }
7363         if (PL_rsfp || PL_parser->filtered)
7364             incline(s, PL_bufend);
7365     } while (PL_parser->in_pod);
7366
7367     PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7368     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7369     PL_last_lop = PL_last_uni = NULL;
7370     if (CopLINE(PL_curcop) == 1) {
7371         while (s < PL_bufend && isSPACE(*s))
7372             s++;
7373         if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7374             s++;
7375         d = NULL;
7376         if (!PL_in_eval) {
7377             if (*s == '#' && *(s+1) == '!')
7378                 d = s + 2;
7379 #ifdef ALTERNATE_SHEBANG
7380             else {
7381                 static char const as[] = ALTERNATE_SHEBANG;
7382                 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7383                     d = s + (sizeof(as) - 1);
7384             }
7385 #endif /* ALTERNATE_SHEBANG */
7386         }
7387         if (d) {
7388             char *ipath;
7389             char *ipathend;
7390
7391             while (isSPACE(*d))
7392                 d++;
7393             ipath = d;
7394             while (*d && !isSPACE(*d))
7395                 d++;
7396             ipathend = d;
7397
7398 #ifdef ARG_ZERO_IS_SCRIPT
7399             if (ipathend > ipath) {
7400                 /*
7401                  * HP-UX (at least) sets argv[0] to the script name,
7402                  * which makes $^X incorrect.  And Digital UNIX and Linux,
7403                  * at least, set argv[0] to the basename of the Perl
7404                  * interpreter. So, having found "#!", we'll set it right.
7405                  */
7406                 SV* copfilesv = CopFILESV(PL_curcop);
7407                 if (copfilesv) {
7408                     SV * const x =
7409                         GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7410                                          SVt_PV)); /* $^X */
7411                     assert(SvPOK(x) || SvGMAGICAL(x));
7412                     if (sv_eq(x, copfilesv)) {
7413                         sv_setpvn(x, ipath, ipathend - ipath);
7414                         SvSETMAGIC(x);
7415                     }
7416                     else {
7417                         STRLEN blen;
7418                         STRLEN llen;
7419                         const char *bstart = SvPV_const(copfilesv, blen);
7420                         const char * const lstart = SvPV_const(x, llen);
7421                         if (llen < blen) {
7422                             bstart += blen - llen;
7423                             if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7424                                 sv_setpvn(x, ipath, ipathend - ipath);
7425                                 SvSETMAGIC(x);
7426                             }
7427                         }
7428                     }
7429                 }
7430                 else {
7431                     /* Anything to do if no copfilesv? */
7432                 }
7433                 TAINT_NOT;      /* $^X is always tainted, but that's OK */
7434             }
7435 #endif /* ARG_ZERO_IS_SCRIPT */
7436
7437             /*
7438              * Look for options.
7439              */
7440             d = instr(s,"perl -");
7441             if (!d) {
7442                 d = instr(s,"perl");
7443 #if defined(DOSISH)
7444                 /* avoid getting into infinite loops when shebang
7445                  * line contains "Perl" rather than "perl" */
7446                 if (!d) {
7447                     for (d = ipathend-4; d >= ipath; --d) {
7448                         if (isALPHA_FOLD_EQ(*d, 'p')
7449                             && !ibcmp(d, "perl", 4))
7450                         {
7451                             break;
7452                         }
7453                     }
7454                     if (d < ipath)
7455                         d = NULL;
7456                 }
7457 #endif
7458             }
7459 #ifdef ALTERNATE_SHEBANG
7460             /*
7461              * If the ALTERNATE_SHEBANG on this system starts with a
7462              * character that can be part of a Perl expression, then if
7463              * we see it but not "perl", we're probably looking at the
7464              * start of Perl code, not a request to hand off to some
7465              * other interpreter.  Similarly, if "perl" is there, but
7466              * not in the first 'word' of the line, we assume the line
7467              * contains the start of the Perl program.
7468              */
7469             if (d && *s != '#') {
7470                 const char *c = ipath;
7471                 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7472                     c++;
7473                 if (c < d)
7474                     d = NULL;   /* "perl" not in first word; ignore */
7475                 else
7476                     *s = '#';   /* Don't try to parse shebang line */
7477             }
7478 #endif /* ALTERNATE_SHEBANG */
7479             if (!d
7480                 && *s == '#'
7481                 && ipathend > ipath
7482                 && !PL_minus_c
7483                 && !instr(s,"indir")
7484                 && instr(PL_origargv[0],"perl"))
7485             {
7486                 char **newargv;
7487
7488                 *ipathend = '\0';
7489                 s = ipathend + 1;
7490                 while (s < PL_bufend && isSPACE(*s))
7491                     s++;
7492                 if (s < PL_bufend) {
7493                     Newx(newargv,PL_origargc+3,char*);
7494                     newargv[1] = s;
7495                     while (s < PL_bufend && !isSPACE(*s))
7496                         s++;
7497                     *s = '\0';
7498                     Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7499                 }
7500                 else
7501                     newargv = PL_origargv;
7502                 newargv[0] = ipath;
7503                 PERL_FPU_PRE_EXEC
7504                 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7505                 PERL_FPU_POST_EXEC
7506                 Perl_croak(aTHX_ "Can't exec %s", ipath);
7507             }
7508             if (d) {
7509                 while (*d && !isSPACE(*d))
7510                     d++;
7511                 while (SPACE_OR_TAB(*d))
7512                     d++;
7513
7514                 if (*d++ == '-') {
7515                     const bool switches_done = PL_doswitches;
7516                     const U32 oldpdb = PL_perldb;
7517                     const bool oldn = PL_minus_n;
7518                     const bool oldp = PL_minus_p;
7519                     const char *d1 = d;
7520
7521                     do {
7522                         bool baduni = FALSE;
7523                         if (*d1 == 'C') {
7524                             const char *d2 = d1 + 1;
7525                             if (parse_unicode_opts((const char **)&d2)
7526                                 != PL_unicode)
7527                                 baduni = TRUE;
7528                         }
7529                         if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7530                             const char * const m = d1;
7531                             while (*d1 && !isSPACE(*d1))
7532                                 d1++;
7533                             Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7534                                   (int)(d1 - m), m);
7535                         }
7536                         d1 = moreswitches(d1);
7537                     } while (d1);
7538                     if (PL_doswitches && !switches_done) {
7539                         int argc = PL_origargc;
7540                         char **argv = PL_origargv;
7541                         do {
7542                             argc--,argv++;
7543                         } while (argc && argv[0][0] == '-' && argv[0][1]);
7544                         init_argv_symbols(argc,argv);
7545                     }
7546                     if (   (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7547                         || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7548                           /* if we have already added "LINE: while (<>) {",
7549                              we must not do it again */
7550                     {
7551                         SvPVCLEAR(PL_linestr);
7552                         PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7553                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7554                         PL_last_lop = PL_last_uni = NULL;
7555                         PL_preambled = FALSE;
7556                         if (PERLDB_LINE_OR_SAVESRC)
7557                             (void)gv_fetchfile(PL_origfilename);
7558                         return YYL_RETRY;
7559                     }
7560                 }
7561             }
7562         }
7563     }
7564
7565     if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7566         PL_lex_state = LEX_FORMLINE;
7567         force_next(FORMRBRACK);
7568         TOKEN(PERLY_SEMICOLON);
7569     }
7570
7571     PL_bufptr = s;
7572     return YYL_RETRY;
7573 }
7574
7575 static int
7576 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7577 {
7578     CLINE;
7579     pl_yylval.opval
7580         = newSVOP(OP_CONST, 0,
7581                        S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7582     pl_yylval.opval->op_private = OPpCONST_BARE;
7583     TERM(BAREWORD);
7584 }
7585
7586 static int
7587 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7588 {
7589     if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7590         && PL_parser->saw_infix_sigil)
7591     {
7592         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7593                          "Operator or semicolon missing before %c%" UTF8f,
7594                          lastchar,
7595                          UTF8fARG(UTF, strlen(PL_tokenbuf),
7596                                   PL_tokenbuf));
7597         Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7598                          "Ambiguous use of %c resolved as operator %c",
7599                          lastchar, lastchar);
7600     }
7601     TOKEN(BAREWORD);
7602 }
7603
7604 static int
7605 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7606 {
7607     if (sv) {
7608         op_free(rv2cv_op);
7609         SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7610         ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7611         if (SvTYPE(sv) == SVt_PVAV)
7612             pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7613                                       pl_yylval.opval);
7614         else {
7615             pl_yylval.opval->op_private = 0;
7616             pl_yylval.opval->op_folded = 1;
7617             pl_yylval.opval->op_flags |= OPf_SPECIAL;
7618         }
7619         TOKEN(BAREWORD);
7620     }
7621
7622     op_free(pl_yylval.opval);
7623     pl_yylval.opval =
7624         off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7625     pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7626     PL_last_lop = PL_oldbufptr;
7627     PL_last_lop_op = OP_ENTERSUB;
7628
7629     /* Is there a prototype? */
7630     if (SvPOK(cv)) {
7631         int k = yyl_subproto(aTHX_ s, cv);
7632         if (k != KEY_NULL)
7633             return k;
7634     }
7635
7636     NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7637     PL_expect = XTERM;
7638     force_next(off ? PRIVATEREF : BAREWORD);
7639     if (!PL_lex_allbrackets
7640         && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7641     {
7642         PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7643     }
7644
7645     TOKEN(NOAMP);
7646 }
7647
7648 /* Honour "reserved word" warnings, and enforce strict subs */
7649 static void
7650 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7651 {
7652     /* after "print" and similar functions (corresponding to
7653      * "F? L" in opcode.pl), whatever wasn't already parsed as
7654      * a filehandle should be subject to "strict subs".
7655      * Likewise for the optional indirect-object argument to system
7656      * or exec, which can't be a bareword */
7657     if ((PL_last_lop_op == OP_PRINT
7658             || PL_last_lop_op == OP_PRTF
7659             || PL_last_lop_op == OP_SAY
7660             || PL_last_lop_op == OP_SYSTEM
7661             || PL_last_lop_op == OP_EXEC)
7662         && (PL_hints & HINT_STRICT_SUBS))
7663     {
7664         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7665     }
7666
7667     if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7668         char *d = PL_tokenbuf;
7669         while (isLOWER(*d))
7670             d++;
7671         if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7672             /* PL_warn_reserved is constant */
7673             GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7674             Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7675                         PL_tokenbuf);
7676             GCC_DIAG_RESTORE_STMT;
7677         }
7678     }
7679 }
7680
7681 static int
7682 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7683 {
7684     int pkgname = 0;
7685     const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7686     bool safebw;
7687     bool no_op_error = FALSE;
7688     /* Use this var to track whether intuit_method has been
7689        called.  intuit_method returns 0 or > 255.  */
7690     int key = 1;
7691
7692     if (PL_expect == XOPERATOR) {
7693         if (PL_bufptr == PL_linestart) {
7694             CopLINE_dec(PL_curcop);
7695             Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7696             CopLINE_inc(PL_curcop);
7697         }
7698         else
7699             /* We want to call no_op with s pointing after the
7700                bareword, so defer it.  But we want it to come
7701                before the Bad name croak.  */
7702             no_op_error = TRUE;
7703     }
7704
7705     /* Get the rest if it looks like a package qualifier */
7706
7707     if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7708         STRLEN morelen;
7709         s = scan_word6(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7710                       TRUE, &morelen, TRUE);
7711         if (no_op_error) {
7712             no_op("Bareword",s);
7713             no_op_error = FALSE;
7714         }
7715         if (!morelen)
7716             Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7717                     UTF8fARG(UTF, len, PL_tokenbuf),
7718                     *s == '\'' ? "'" : "::");
7719         len += morelen;
7720         pkgname = 1;
7721     }
7722
7723     if (no_op_error)
7724         no_op("Bareword",s);
7725
7726     /* See if the name is "Foo::",
7727        in which case Foo is a bareword
7728        (and a package name). */
7729
7730     if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7731         if (ckWARN(WARN_BAREWORD)
7732             && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7733             Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7734                         "Bareword \"%" UTF8f
7735                         "\" refers to nonexistent package",
7736                         UTF8fARG(UTF, len, PL_tokenbuf));
7737         len -= 2;
7738         PL_tokenbuf[len] = '\0';
7739         c.gv = NULL;
7740         c.gvp = 0;
7741         safebw = TRUE;
7742     }
7743     else {
7744         safebw = FALSE;
7745     }
7746
7747     /* if we saw a global override before, get the right name */
7748
7749     if (!c.sv)
7750         c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7751     if (c.gvp) {
7752         SV *sv = newSVpvs("CORE::GLOBAL::");
7753         sv_catsv(sv, c.sv);
7754         SvREFCNT_dec(c.sv);
7755         c.sv = sv;
7756     }
7757
7758     /* Presume this is going to be a bareword of some sort. */
7759     CLINE;
7760     pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7761     pl_yylval.opval->op_private = OPpCONST_BARE;
7762
7763     /* And if "Foo::", then that's what it certainly is. */
7764     if (safebw)
7765         return yyl_safe_bareword(aTHX_ s, lastchar);
7766
7767     if (!c.off) {
7768         OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7769         const_op->op_private = OPpCONST_BARE;
7770         c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7771         c.cv = c.lex
7772             ? isGV(c.gv)
7773                 ? GvCV(c.gv)
7774                 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7775                     ? (CV *)SvRV(c.gv)
7776                     : ((CV *)c.gv)
7777             : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7778     }
7779
7780     /* See if it's the indirect object for a list operator. */
7781
7782     if (PL_oldoldbufptr
7783         && PL_oldoldbufptr < PL_bufptr
7784         && (PL_oldoldbufptr == PL_last_lop
7785             || PL_oldoldbufptr == PL_last_uni)
7786         && /* NO SKIPSPACE BEFORE HERE! */
7787            (PL_expect == XREF
7788             || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7789                                                    == OA_FILEREF))
7790     {
7791         bool immediate_paren = *s == '(';
7792         SSize_t s_off;
7793
7794         /* (Now we can afford to cross potential line boundary.) */
7795         s = skipspace(s);
7796
7797         /* intuit_method() can indirectly call lex_next_chunk(),
7798          * invalidating s
7799          */
7800         s_off = s - SvPVX(PL_linestr);
7801         /* Two barewords in a row may indicate method call. */
7802         if (   (   isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7803                 || *s == '$')
7804             && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7805         {
7806             /* the code at method: doesn't use s */
7807             goto method;
7808         }
7809         s = SvPVX(PL_linestr) + s_off;
7810
7811         /* If not a declared subroutine, it's an indirect object. */
7812         /* (But it's an indir obj regardless for sort.) */
7813         /* Also, if "_" follows a filetest operator, it's a bareword */
7814
7815         if (
7816             ( !immediate_paren && (PL_last_lop_op == OP_SORT
7817              || (!c.cv
7818                  && (PL_last_lop_op != OP_MAPSTART
7819                      && PL_last_lop_op != OP_GREPSTART))))
7820            || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7821                 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7822                                                 == OA_FILESTATOP))
7823            )
7824         {
7825             PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7826             yyl_strictwarn_bareword(aTHX_ lastchar);
7827             op_free(c.rv2cv_op);
7828             return yyl_safe_bareword(aTHX_ s, lastchar);
7829         }
7830     }
7831
7832     PL_expect = XOPERATOR;
7833     s = skipspace(s);
7834
7835     /* Is this a word before a => operator? */
7836     if (*s == '=' && s[1] == '>' && !pkgname) {
7837         op_free(c.rv2cv_op);
7838         CLINE;
7839         if (c.gvp || (c.lex && !c.off)) {
7840             assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7841             /* This is our own scalar, created a few lines
7842                above, so this is safe. */
7843             SvREADONLY_off(c.sv);
7844             sv_setpv(c.sv, PL_tokenbuf);
7845             if (UTF && !IN_BYTES
7846              && is_utf8_string((U8*)PL_tokenbuf, len))
7847                   SvUTF8_on(c.sv);
7848             SvREADONLY_on(c.sv);
7849         }
7850         TERM(BAREWORD);
7851     }
7852
7853     /* If followed by a paren, it's certainly a subroutine. */
7854     if (*s == '(') {
7855         CLINE;
7856         if (c.cv) {
7857             char *d = s + 1;
7858             while (SPACE_OR_TAB(*d))
7859                 d++;
7860             if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7861                 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7862         }
7863         NEXTVAL_NEXTTOKE.opval =
7864             c.off ? c.rv2cv_op : pl_yylval.opval;
7865         if (c.off)
7866              op_free(pl_yylval.opval), force_next(PRIVATEREF);
7867         else op_free(c.rv2cv_op),      force_next(BAREWORD);
7868         pl_yylval.ival = 0;
7869         TOKEN(PERLY_AMPERSAND);
7870     }
7871
7872     /* If followed by var or block, call it a method (unless sub) */
7873
7874     if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7875         op_free(c.rv2cv_op);
7876         PL_last_lop = PL_oldbufptr;
7877         PL_last_lop_op = OP_METHOD;
7878         if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7879             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7880         PL_expect = XBLOCKTERM;
7881         PL_bufptr = s;
7882         return REPORT(METHCALL0);
7883     }
7884
7885     /* If followed by a bareword, see if it looks like indir obj. */
7886
7887     if (   key == 1
7888         && !orig_keyword
7889         && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7890         && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7891     {
7892       method:
7893         if (c.lex && !c.off) {
7894             assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7895             SvREADONLY_off(c.sv);
7896             sv_setpvn(c.sv, PL_tokenbuf, len);
7897             if (UTF && !IN_BYTES
7898              && is_utf8_string((U8*)PL_tokenbuf, len))
7899                 SvUTF8_on(c.sv);
7900             else SvUTF8_off(c.sv);
7901         }
7902         op_free(c.rv2cv_op);
7903         if (key == METHCALL0 && !PL_lex_allbrackets
7904             && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7905         {
7906             PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7907         }
7908         return REPORT(key);
7909     }
7910
7911     /* Not a method, so call it a subroutine (if defined) */
7912
7913     if (c.cv) {
7914         /* Check for a constant sub */
7915         c.sv = cv_const_sv_or_av(c.cv);
7916         return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7917     }
7918
7919     /* Call it a bare word */
7920
7921     if (PL_hints & HINT_STRICT_SUBS)
7922         pl_yylval.opval->op_private |= OPpCONST_STRICT;
7923     else
7924         yyl_strictwarn_bareword(aTHX_ lastchar);
7925
7926     op_free(c.rv2cv_op);
7927
7928     return yyl_safe_bareword(aTHX_ s, lastchar);
7929 }
7930
7931 static int
7932 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7933 {
7934     switch (key) {
7935     default:                    /* not a keyword */
7936         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7937
7938     case KEY___FILE__:
7939         FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7940
7941     case KEY___LINE__:
7942         FUN0OP(
7943             newSVOP(OP_CONST, 0,
7944                 Perl_newSVpvf(aTHX_ "%" LINE_Tf, CopLINE(PL_curcop)))
7945         );
7946
7947     case KEY___PACKAGE__:
7948         FUN0OP(
7949             newSVOP(OP_CONST, 0, (PL_curstash
7950                                      ? newSVhek(HvNAME_HEK(PL_curstash))
7951                                      : &PL_sv_undef))
7952         );
7953
7954     case KEY___DATA__:
7955     case KEY___END__:
7956         if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7957             yyl_data_handle(aTHX);
7958         return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7959
7960     case KEY___SUB__:
7961         /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7962          * OP_CONST. We need to make it big enough to allow room for that if
7963          * so */
7964         FUN0OP(CvCLONE(PL_compcv)
7965                     ? newOP(OP_RUNCV, 0)
7966                     : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7967
7968     case KEY___CLASS__:
7969         FUN0(OP_CLASSNAME);
7970
7971     case KEY_AUTOLOAD:
7972     case KEY_DESTROY:
7973     case KEY_BEGIN:
7974     case KEY_UNITCHECK:
7975     case KEY_CHECK:
7976     case KEY_INIT:
7977     case KEY_END:
7978         if (PL_expect == XSTATE)
7979             return yyl_sub(aTHX_ PL_bufptr, key);
7980         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7981
7982     case KEY_ADJUST:
7983         Perl_ck_warner_d(aTHX_
7984             packWARN(WARN_EXPERIMENTAL__CLASS), "ADJUST is experimental");
7985
7986         /* The way that KEY_CHECK et.al. are handled currently are nothing
7987          * short of crazy. We won't copy that model for new phasers, but use
7988          * this as an experiment to test if this will work
7989          */
7990         PHASERBLOCK(KEY_ADJUST);
7991
7992     case KEY_abs:
7993         UNI(OP_ABS);
7994
7995     case KEY_alarm:
7996         UNI(OP_ALARM);
7997
7998     case KEY_accept:
7999         LOP(OP_ACCEPT,XTERM);
8000
8001     case KEY_and:
8002         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8003             return REPORT(0);
8004         OPERATOR(ANDOP);
8005
8006     case KEY_atan2:
8007         LOP(OP_ATAN2,XTERM);
8008
8009     case KEY_bind:
8010         LOP(OP_BIND,XTERM);
8011
8012     case KEY_binmode:
8013         LOP(OP_BINMODE,XTERM);
8014
8015     case KEY_bless:
8016         LOP(OP_BLESS,XTERM);
8017
8018     case KEY_break:
8019         FUN0(OP_BREAK);
8020
8021     case KEY_catch:
8022         Perl_ck_warner_d(aTHX_
8023             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8024         PREBLOCK(KW_CATCH);
8025
8026     case KEY_chop:
8027         UNI(OP_CHOP);
8028
8029     case KEY_class:
8030         Perl_ck_warner_d(aTHX_
8031             packWARN(WARN_EXPERIMENTAL__CLASS), "class is experimental");
8032
8033         s = force_word(s,BAREWORD,FALSE,TRUE);
8034         s = skipspace(s);
8035         s = force_strict_version(s);
8036         PL_expect = XATTRBLOCK;
8037         TOKEN(KW_CLASS);
8038
8039     case KEY_continue:
8040         /* We have to disambiguate the two senses of
8041           "continue". If the next token is a '{' then
8042           treat it as the start of a continue block;
8043           otherwise treat it as a control operator.
8044          */
8045         s = skipspace(s);
8046         if (*s == '{')
8047             PREBLOCK(KW_CONTINUE);
8048         else
8049             FUN0(OP_CONTINUE);
8050
8051     case KEY_chdir:
8052         /* may use HOME */
8053         (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
8054         UNI(OP_CHDIR);
8055
8056     case KEY_close:
8057         UNI(OP_CLOSE);
8058
8059     case KEY_closedir:
8060         UNI(OP_CLOSEDIR);
8061
8062     case KEY_cmp:
8063         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8064             return REPORT(0);
8065         NCEop(OP_SCMP);
8066
8067     case KEY_caller:
8068         UNI(OP_CALLER);
8069
8070     case KEY_crypt:
8071
8072         LOP(OP_CRYPT,XTERM);
8073
8074     case KEY_chmod:
8075         LOP(OP_CHMOD,XTERM);
8076
8077     case KEY_chown:
8078         LOP(OP_CHOWN,XTERM);
8079
8080     case KEY_connect:
8081         LOP(OP_CONNECT,XTERM);
8082
8083     case KEY_chr:
8084         UNI(OP_CHR);
8085
8086     case KEY_cos:
8087         UNI(OP_COS);
8088
8089     case KEY_chroot:
8090         UNI(OP_CHROOT);
8091
8092     case KEY_default:
8093         PREBLOCK(KW_DEFAULT);
8094
8095     case KEY_defer:
8096         Perl_ck_warner_d(aTHX_
8097             packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
8098         PREBLOCK(KW_DEFER);
8099
8100     case KEY_do:
8101         return yyl_do(aTHX_ s, orig_keyword);
8102
8103     case KEY_die:
8104         PL_hints |= HINT_BLOCK_SCOPE;
8105         LOP(OP_DIE,XTERM);
8106
8107     case KEY_defined:
8108         UNI(OP_DEFINED);
8109
8110     case KEY_delete:
8111         UNI(OP_DELETE);
8112
8113     case KEY_dbmopen:
8114         Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
8115                           STR_WITH_LEN("NDBM_File::"),
8116                           STR_WITH_LEN("DB_File::"),
8117                           STR_WITH_LEN("GDBM_File::"),
8118                           STR_WITH_LEN("SDBM_File::"),
8119                           STR_WITH_LEN("ODBM_File::"),
8120                           NULL);
8121         LOP(OP_DBMOPEN,XTERM);
8122
8123     case KEY_dbmclose:
8124         UNI(OP_DBMCLOSE);
8125
8126     case KEY_dump:
8127         LOOPX(OP_DUMP);
8128
8129     case KEY_else:
8130         PREBLOCK(KW_ELSE);
8131
8132     case KEY_elsif:
8133         pl_yylval.ival = CopLINE(PL_curcop);
8134         OPERATOR(KW_ELSIF);
8135
8136     case KEY_eq:
8137         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8138             return REPORT(0);
8139         ChEop(OP_SEQ);
8140
8141     case KEY_exists:
8142         UNI(OP_EXISTS);
8143
8144     case KEY_exit:
8145         UNI(OP_EXIT);
8146
8147     case KEY_eval:
8148         s = skipspace(s);
8149         if (*s == '{') { /* block eval */
8150             PL_expect = XTERMBLOCK;
8151             UNIBRACK(OP_ENTERTRY);
8152         }
8153         else { /* string eval */
8154             PL_expect = XTERM;
8155             UNIBRACK(OP_ENTEREVAL);
8156         }
8157
8158     case KEY_evalbytes:
8159         PL_expect = XTERM;
8160         UNIBRACK(-OP_ENTEREVAL);
8161
8162     case KEY_eof:
8163         UNI(OP_EOF);
8164
8165     case KEY_exp:
8166         UNI(OP_EXP);
8167
8168     case KEY_each:
8169         UNI(OP_EACH);
8170
8171     case KEY_exec:
8172         LOP(OP_EXEC,XREF);
8173
8174     case KEY_endhostent:
8175         FUN0(OP_EHOSTENT);
8176
8177     case KEY_endnetent:
8178         FUN0(OP_ENETENT);
8179
8180     case KEY_endservent:
8181         FUN0(OP_ESERVENT);
8182
8183     case KEY_endprotoent:
8184         FUN0(OP_EPROTOENT);
8185
8186     case KEY_endpwent:
8187         FUN0(OP_EPWENT);
8188
8189     case KEY_endgrent:
8190         FUN0(OP_EGRENT);
8191
8192     case KEY_field:
8193         /* TODO: maybe this should use the same parser/grammar structures as
8194          * `my`, but it's also rather messy because of the `our` conflation
8195          */
8196         Perl_ck_warner_d(aTHX_
8197             packWARN(WARN_EXPERIMENTAL__CLASS), "field is experimental");
8198
8199         croak_kw_unless_class("field");
8200
8201         PL_parser->in_my = KEY_field;
8202         OPERATOR(KW_FIELD);
8203
8204     case KEY_finally:
8205         Perl_ck_warner_d(aTHX_
8206             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
8207         PREBLOCK(KW_FINALLY);
8208
8209     case KEY_for:
8210     case KEY_foreach:
8211         return yyl_foreach(aTHX_ s);
8212
8213     case KEY_formline:
8214         LOP(OP_FORMLINE,XTERM);
8215
8216     case KEY_fork:
8217         FUN0(OP_FORK);
8218
8219     case KEY_fc:
8220         UNI(OP_FC);
8221
8222     case KEY_fcntl:
8223         LOP(OP_FCNTL,XTERM);
8224
8225     case KEY_fileno:
8226         UNI(OP_FILENO);
8227
8228     case KEY_flock:
8229         LOP(OP_FLOCK,XTERM);
8230
8231     case KEY_gt:
8232         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8233             return REPORT(0);
8234         ChRop(OP_SGT);
8235
8236     case KEY_ge:
8237         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8238             return REPORT(0);
8239         ChRop(OP_SGE);
8240
8241     case KEY_grep:
8242         LOP(OP_GREPSTART, XREF);
8243
8244     case KEY_goto:
8245         LOOPX(OP_GOTO);
8246
8247     case KEY_gmtime:
8248         UNI(OP_GMTIME);
8249
8250     case KEY_getc:
8251         UNIDOR(OP_GETC);
8252
8253     case KEY_getppid:
8254         FUN0(OP_GETPPID);
8255
8256     case KEY_getpgrp:
8257         UNI(OP_GETPGRP);
8258
8259     case KEY_getpriority:
8260         LOP(OP_GETPRIORITY,XTERM);
8261
8262     case KEY_getprotobyname:
8263         UNI(OP_GPBYNAME);
8264
8265     case KEY_getprotobynumber:
8266         LOP(OP_GPBYNUMBER,XTERM);
8267
8268     case KEY_getprotoent:
8269         FUN0(OP_GPROTOENT);
8270
8271     case KEY_getpwent:
8272         FUN0(OP_GPWENT);
8273
8274     case KEY_getpwnam:
8275         UNI(OP_GPWNAM);
8276
8277     case KEY_getpwuid:
8278         UNI(OP_GPWUID);
8279
8280     case KEY_getpeername:
8281         UNI(OP_GETPEERNAME);
8282
8283     case KEY_gethostbyname:
8284         UNI(OP_GHBYNAME);
8285
8286     case KEY_gethostbyaddr:
8287         LOP(OP_GHBYADDR,XTERM);
8288
8289     case KEY_gethostent:
8290         FUN0(OP_GHOSTENT);
8291
8292     case KEY_getnetbyname:
8293         UNI(OP_GNBYNAME);
8294
8295     case KEY_getnetbyaddr:
8296         LOP(OP_GNBYADDR,XTERM);
8297
8298     case KEY_getnetent:
8299         FUN0(OP_GNETENT);
8300
8301     case KEY_getservbyname:
8302         LOP(OP_GSBYNAME,XTERM);
8303
8304     case KEY_getservbyport:
8305         LOP(OP_GSBYPORT,XTERM);
8306
8307     case KEY_getservent:
8308         FUN0(OP_GSERVENT);
8309
8310     case KEY_getsockname:
8311         UNI(OP_GETSOCKNAME);
8312
8313     case KEY_getsockopt:
8314         LOP(OP_GSOCKOPT,XTERM);
8315
8316     case KEY_getgrent:
8317         FUN0(OP_GGRENT);
8318
8319     case KEY_getgrnam:
8320         UNI(OP_GGRNAM);
8321
8322     case KEY_getgrgid:
8323         UNI(OP_GGRGID);
8324
8325     case KEY_getlogin:
8326         FUN0(OP_GETLOGIN);
8327
8328     case KEY_given:
8329         pl_yylval.ival = CopLINE(PL_curcop);
8330         Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__SMARTMATCH),
8331                          "given is deprecated");
8332         OPERATOR(KW_GIVEN);
8333
8334     case KEY_glob:
8335         LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8336
8337     case KEY_hex:
8338         UNI(OP_HEX);
8339
8340     case KEY_if:
8341         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8342             return REPORT(0);
8343         pl_yylval.ival = CopLINE(PL_curcop);
8344         OPERATOR(KW_IF);
8345
8346     case KEY_index:
8347         LOP(OP_INDEX,XTERM);
8348
8349     case KEY_int:
8350         UNI(OP_INT);
8351
8352     case KEY_ioctl:
8353         LOP(OP_IOCTL,XTERM);
8354
8355     case KEY_isa:
8356         NCRop(OP_ISA);
8357
8358     case KEY_join:
8359         LOP(OP_JOIN,XTERM);
8360
8361     case KEY_keys:
8362         UNI(OP_KEYS);
8363
8364     case KEY_kill:
8365         LOP(OP_KILL,XTERM);
8366
8367     case KEY_last:
8368         LOOPX(OP_LAST);
8369
8370     case KEY_lc:
8371         UNI(OP_LC);
8372
8373     case KEY_lcfirst:
8374         UNI(OP_LCFIRST);
8375
8376     case KEY_local:
8377         OPERATOR(KW_LOCAL);
8378
8379     case KEY_length:
8380         UNI(OP_LENGTH);
8381
8382     case KEY_lt:
8383         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8384             return REPORT(0);
8385         ChRop(OP_SLT);
8386
8387     case KEY_le:
8388         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8389             return REPORT(0);
8390         ChRop(OP_SLE);
8391
8392     case KEY_localtime:
8393         UNI(OP_LOCALTIME);
8394
8395     case KEY_log:
8396         UNI(OP_LOG);
8397
8398     case KEY_link:
8399         LOP(OP_LINK,XTERM);
8400
8401     case KEY_listen:
8402         LOP(OP_LISTEN,XTERM);
8403
8404     case KEY_lock:
8405         UNI(OP_LOCK);
8406
8407     case KEY_lstat:
8408         UNI(OP_LSTAT);
8409
8410     case KEY_m:
8411         s = scan_pat(s,OP_MATCH);
8412         TERM(sublex_start());
8413
8414     case KEY_map:
8415         LOP(OP_MAPSTART, XREF);
8416
8417     case KEY_mkdir:
8418         LOP(OP_MKDIR,XTERM);
8419
8420     case KEY_msgctl:
8421         LOP(OP_MSGCTL,XTERM);
8422
8423     case KEY_msgget:
8424         LOP(OP_MSGGET,XTERM);
8425
8426     case KEY_msgrcv:
8427         LOP(OP_MSGRCV,XTERM);
8428
8429     case KEY_msgsnd:
8430         LOP(OP_MSGSND,XTERM);
8431
8432     case KEY_our:
8433     case KEY_my:
8434     case KEY_state:
8435         return yyl_my(aTHX_ s, key);
8436
8437     case KEY_next:
8438         LOOPX(OP_NEXT);
8439
8440     case KEY_ne:
8441         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8442             return REPORT(0);
8443         ChEop(OP_SNE);
8444
8445     case KEY_no:
8446         s = tokenize_use(0, s);
8447         TOKEN(KW_USE_or_NO);
8448
8449     case KEY_not:
8450         if (*s == '(' || (s = skipspace(s), *s == '('))
8451             FUN1(OP_NOT);
8452         else {
8453             if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8454                 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8455             OPERATOR(NOTOP);
8456         }
8457
8458     case KEY_open:
8459         s = skipspace(s);
8460         if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8461             const char *t;
8462             char *d = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8463             for (t=d; isSPACE(*t);)
8464                 t++;
8465             if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8466                 /* [perl #16184] */
8467                 && !(t[0] == '=' && t[1] == '>')
8468                 && !(t[0] == ':' && t[1] == ':')
8469                 && !keyword(s, d-s, 0)
8470             ) {
8471                 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8472                    "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8473                     UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8474             }
8475         }
8476         LOP(OP_OPEN,XTERM);
8477
8478     case KEY_or:
8479         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8480             return REPORT(0);
8481         pl_yylval.ival = OP_OR;
8482         OPERATOR(OROP);
8483
8484     case KEY_ord:
8485         UNI(OP_ORD);
8486
8487     case KEY_oct:
8488         UNI(OP_OCT);
8489
8490     case KEY_opendir:
8491         LOP(OP_OPEN_DIR,XTERM);
8492
8493     case KEY_print:
8494         checkcomma(s,PL_tokenbuf,"filehandle");
8495         LOP(OP_PRINT,XREF);
8496
8497     case KEY_printf:
8498         checkcomma(s,PL_tokenbuf,"filehandle");
8499         LOP(OP_PRTF,XREF);
8500
8501     case KEY_prototype:
8502         UNI(OP_PROTOTYPE);
8503
8504     case KEY_push:
8505         LOP(OP_PUSH,XTERM);
8506
8507     case KEY_pop:
8508         UNIDOR(OP_POP);
8509
8510     case KEY_pos:
8511         UNIDOR(OP_POS);
8512
8513     case KEY_pack:
8514         LOP(OP_PACK,XTERM);
8515
8516     case KEY_package:
8517         s = force_word(s,BAREWORD,FALSE,TRUE);
8518         s = skipspace(s);
8519         s = force_strict_version(s);
8520         PREBLOCK(KW_PACKAGE);
8521
8522     case KEY_pipe:
8523         LOP(OP_PIPE_OP,XTERM);
8524
8525     case KEY_q:
8526         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8527         if (!s)
8528             missingterm(NULL, 0);
8529         COPLINE_SET_FROM_MULTI_END;
8530         pl_yylval.ival = OP_CONST;
8531         TERM(sublex_start());
8532
8533     case KEY_quotemeta:
8534         UNI(OP_QUOTEMETA);
8535
8536     case KEY_qw:
8537         return yyl_qw(aTHX_ s, len);
8538
8539     case KEY_qq:
8540         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8541         if (!s)
8542             missingterm(NULL, 0);
8543         pl_yylval.ival = OP_STRINGIFY;
8544         if (SvIVX(PL_lex_stuff) == '\'')
8545             SvIV_set(PL_lex_stuff, 0);  /* qq'$foo' should interpolate */
8546         TERM(sublex_start());
8547
8548     case KEY_qr:
8549         s = scan_pat(s,OP_QR);
8550         TERM(sublex_start());
8551
8552     case KEY_qx:
8553         s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8554         if (!s)
8555             missingterm(NULL, 0);
8556         pl_yylval.ival = OP_BACKTICK;
8557         TERM(sublex_start());
8558
8559     case KEY_return:
8560         OLDLOP(OP_RETURN);
8561
8562     case KEY_require:
8563         return yyl_require(aTHX_ s, orig_keyword);
8564
8565     case KEY_reset:
8566         UNI(OP_RESET);
8567
8568     case KEY_redo:
8569         LOOPX(OP_REDO);
8570
8571     case KEY_rename:
8572         LOP(OP_RENAME,XTERM);
8573
8574     case KEY_rand:
8575         UNI(OP_RAND);
8576
8577     case KEY_rmdir:
8578         UNI(OP_RMDIR);
8579
8580     case KEY_rindex:
8581         LOP(OP_RINDEX,XTERM);
8582
8583     case KEY_read:
8584         LOP(OP_READ,XTERM);
8585
8586     case KEY_readdir:
8587         UNI(OP_READDIR);
8588
8589     case KEY_readline:
8590         UNIDOR(OP_READLINE);
8591
8592     case KEY_readpipe:
8593         UNIDOR(OP_BACKTICK);
8594
8595     case KEY_rewinddir:
8596         UNI(OP_REWINDDIR);
8597
8598     case KEY_recv:
8599         LOP(OP_RECV,XTERM);
8600
8601     case KEY_reverse:
8602         LOP(OP_REVERSE,XTERM);
8603
8604     case KEY_readlink:
8605         UNIDOR(OP_READLINK);
8606
8607     case KEY_ref:
8608         UNI(OP_REF);
8609
8610     case KEY_s:
8611         s = scan_subst(s);
8612         if (pl_yylval.opval)
8613             TERM(sublex_start());
8614         else
8615             TOKEN(1);   /* force error */
8616
8617     case KEY_say:
8618         checkcomma(s,PL_tokenbuf,"filehandle");
8619         LOP(OP_SAY,XREF);
8620
8621     case KEY_chomp:
8622         UNI(OP_CHOMP);
8623
8624     case KEY_scalar:
8625         UNI(OP_SCALAR);
8626
8627     case KEY_select:
8628         LOP(OP_SELECT,XTERM);
8629
8630     case KEY_seek:
8631         LOP(OP_SEEK,XTERM);
8632
8633     case KEY_semctl:
8634         LOP(OP_SEMCTL,XTERM);
8635
8636     case KEY_semget:
8637         LOP(OP_SEMGET,XTERM);
8638
8639     case KEY_semop:
8640         LOP(OP_SEMOP,XTERM);
8641
8642     case KEY_send:
8643         LOP(OP_SEND,XTERM);
8644
8645     case KEY_setpgrp:
8646         LOP(OP_SETPGRP,XTERM);
8647
8648     case KEY_setpriority:
8649         LOP(OP_SETPRIORITY,XTERM);
8650
8651     case KEY_sethostent:
8652         UNI(OP_SHOSTENT);
8653
8654     case KEY_setnetent:
8655         UNI(OP_SNETENT);
8656
8657     case KEY_setservent:
8658         UNI(OP_SSERVENT);
8659
8660     case KEY_setprotoent:
8661         UNI(OP_SPROTOENT);
8662
8663     case KEY_setpwent:
8664         FUN0(OP_SPWENT);
8665
8666     case KEY_setgrent:
8667         FUN0(OP_SGRENT);
8668
8669     case KEY_seekdir:
8670         LOP(OP_SEEKDIR,XTERM);
8671
8672     case KEY_setsockopt:
8673         LOP(OP_SSOCKOPT,XTERM);
8674
8675     case KEY_shift:
8676         UNIDOR(OP_SHIFT);
8677
8678     case KEY_shmctl:
8679         LOP(OP_SHMCTL,XTERM);
8680
8681     case KEY_shmget:
8682         LOP(OP_SHMGET,XTERM);
8683
8684     case KEY_shmread:
8685         LOP(OP_SHMREAD,XTERM);
8686
8687     case KEY_shmwrite:
8688         LOP(OP_SHMWRITE,XTERM);
8689
8690     case KEY_shutdown:
8691         LOP(OP_SHUTDOWN,XTERM);
8692
8693     case KEY_sin:
8694         UNI(OP_SIN);
8695
8696     case KEY_sleep:
8697         UNI(OP_SLEEP);
8698
8699     case KEY_socket:
8700         LOP(OP_SOCKET,XTERM);
8701
8702     case KEY_socketpair:
8703         LOP(OP_SOCKPAIR,XTERM);
8704
8705     case KEY_sort:
8706         checkcomma(s,PL_tokenbuf,"subroutine name");
8707         s = skipspace(s);
8708         PL_expect = XTERM;
8709         s = force_word(s,BAREWORD,TRUE,TRUE);
8710         LOP(OP_SORT,XREF);
8711
8712     case KEY_split:
8713         LOP(OP_SPLIT,XTERM);
8714
8715     case KEY_sprintf:
8716         LOP(OP_SPRINTF,XTERM);
8717
8718     case KEY_splice:
8719         LOP(OP_SPLICE,XTERM);
8720
8721     case KEY_sqrt:
8722         UNI(OP_SQRT);
8723
8724     case KEY_srand:
8725         UNI(OP_SRAND);
8726
8727     case KEY_stat:
8728         UNI(OP_STAT);
8729
8730     case KEY_study:
8731         UNI(OP_STUDY);
8732
8733     case KEY_substr:
8734         LOP(OP_SUBSTR,XTERM);
8735
8736     case KEY_method:
8737         /* For now we just treat 'method' identical to 'sub' plus a warning */
8738         Perl_ck_warner_d(aTHX_
8739             packWARN(WARN_EXPERIMENTAL__CLASS), "method is experimental");
8740         return yyl_sub(aTHX_ s, KEY_method);
8741
8742     case KEY_format:
8743     case KEY_sub:
8744         return yyl_sub(aTHX_ s, key);
8745
8746     case KEY_system:
8747         LOP(OP_SYSTEM,XREF);
8748
8749     case KEY_symlink:
8750         LOP(OP_SYMLINK,XTERM);
8751
8752     case KEY_syscall:
8753         LOP(OP_SYSCALL,XTERM);
8754
8755     case KEY_sysopen:
8756         LOP(OP_SYSOPEN,XTERM);
8757
8758     case KEY_sysseek:
8759         LOP(OP_SYSSEEK,XTERM);
8760
8761     case KEY_sysread:
8762         LOP(OP_SYSREAD,XTERM);
8763
8764     case KEY_syswrite:
8765         LOP(OP_SYSWRITE,XTERM);
8766
8767     case KEY_tr:
8768     case KEY_y:
8769         s = scan_trans(s);
8770         TERM(sublex_start());
8771
8772     case KEY_tell:
8773         UNI(OP_TELL);
8774
8775     case KEY_telldir:
8776         UNI(OP_TELLDIR);
8777
8778     case KEY_tie:
8779         LOP(OP_TIE,XTERM);
8780
8781     case KEY_tied:
8782         UNI(OP_TIED);
8783
8784     case KEY_time:
8785         FUN0(OP_TIME);
8786
8787     case KEY_times:
8788         FUN0(OP_TMS);
8789
8790     case KEY_truncate:
8791         LOP(OP_TRUNCATE,XTERM);
8792
8793     case KEY_try:
8794         pl_yylval.ival = CopLINE(PL_curcop);
8795         Perl_ck_warner_d(aTHX_
8796             packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8797         PREBLOCK(KW_TRY);
8798
8799     case KEY_uc:
8800         UNI(OP_UC);
8801
8802     case KEY_ucfirst:
8803         UNI(OP_UCFIRST);
8804
8805     case KEY_untie:
8806         UNI(OP_UNTIE);
8807
8808     case KEY_until:
8809         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8810             return REPORT(0);
8811         pl_yylval.ival = CopLINE(PL_curcop);
8812         OPERATOR(KW_UNTIL);
8813
8814     case KEY_unless:
8815         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8816             return REPORT(0);
8817         pl_yylval.ival = CopLINE(PL_curcop);
8818         OPERATOR(KW_UNLESS);
8819
8820     case KEY_unlink:
8821         LOP(OP_UNLINK,XTERM);
8822
8823     case KEY_undef:
8824         UNIDOR(OP_UNDEF);
8825
8826     case KEY_unpack:
8827         LOP(OP_UNPACK,XTERM);
8828
8829     case KEY_utime:
8830         LOP(OP_UTIME,XTERM);
8831
8832     case KEY_umask:
8833         UNIDOR(OP_UMASK);
8834
8835     case KEY_unshift:
8836         LOP(OP_UNSHIFT,XTERM);
8837
8838     case KEY_use:
8839         s = tokenize_use(1, s);
8840         TOKEN(KW_USE_or_NO);
8841
8842     case KEY_values:
8843         UNI(OP_VALUES);
8844
8845     case KEY_vec:
8846         LOP(OP_VEC,XTERM);
8847
8848     case KEY_when:
8849         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8850             return REPORT(0);
8851         pl_yylval.ival = CopLINE(PL_curcop);
8852         Perl_ck_warner_d(aTHX_
8853             packWARN(WARN_DEPRECATED__SMARTMATCH),
8854             "when is deprecated");
8855         OPERATOR(KW_WHEN);
8856
8857     case KEY_while:
8858         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8859             return REPORT(0);
8860         pl_yylval.ival = CopLINE(PL_curcop);
8861         OPERATOR(KW_WHILE);
8862
8863     case KEY_warn:
8864         PL_hints |= HINT_BLOCK_SCOPE;
8865         LOP(OP_WARN,XTERM);
8866
8867     case KEY_wait:
8868         FUN0(OP_WAIT);
8869
8870     case KEY_waitpid:
8871         LOP(OP_WAITPID,XTERM);
8872
8873     case KEY_wantarray:
8874         FUN0(OP_WANTARRAY);
8875
8876     case KEY_write:
8877         /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8878          * we use the same number on EBCDIC */
8879         gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8880         UNI(OP_ENTERWRITE);
8881
8882     case KEY_x:
8883         if (PL_expect == XOPERATOR) {
8884             if (*s == '=' && !PL_lex_allbrackets
8885                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8886             {
8887                 return REPORT(0);
8888             }
8889             Mop(OP_REPEAT);
8890         }
8891         check_uni();
8892         return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8893
8894     case KEY_xor:
8895         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8896             return REPORT(0);
8897         pl_yylval.ival = OP_XOR;
8898         OPERATOR(OROP);
8899     }
8900 }
8901
8902 static int
8903 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8904 {
8905     I32 key = 0;
8906     I32 orig_keyword = 0;
8907     STRLEN olen = len;
8908     char *d = s;
8909     s += 2;
8910     s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8911     if ((*s == ':' && s[1] == ':')
8912         || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8913     {
8914         Copy(PL_bufptr, PL_tokenbuf, olen, char);
8915         return yyl_just_a_word(aTHX_ d, olen, 0, c);
8916     }
8917     if (!key)
8918         Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8919                           UTF8fARG(UTF, len, PL_tokenbuf));
8920     if (key < 0)
8921         key = -key;
8922     else if (key == KEY_require || key == KEY_do
8923           || key == KEY_glob)
8924         /* that's a way to remember we saw "CORE::" */
8925         orig_keyword = key;
8926
8927     /* Known to be a reserved word at this point */
8928     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8929 }
8930
8931 struct Perl_custom_infix_result {
8932     struct Perl_custom_infix *def;
8933     SV                       *parsedata;
8934 };
8935
8936 static enum yytokentype tokentype_for_plugop(struct Perl_custom_infix *def)
8937 {
8938     enum Perl_custom_infix_precedence prec = def->prec;
8939     if(prec <= INFIX_PREC_LOW)
8940         return PLUGIN_LOW_OP;
8941     if(prec <= INFIX_PREC_LOGICAL_OR_LOW)
8942         return PLUGIN_LOGICAL_OR_LOW_OP;
8943     if(prec <= INFIX_PREC_LOGICAL_AND_LOW)
8944         return PLUGIN_LOGICAL_AND_LOW_OP;
8945     if(prec <= INFIX_PREC_ASSIGN)
8946         return PLUGIN_ASSIGN_OP;
8947     if(prec <= INFIX_PREC_LOGICAL_OR)
8948         return PLUGIN_LOGICAL_OR_OP;
8949     if(prec <= INFIX_PREC_LOGICAL_AND)
8950         return PLUGIN_LOGICAL_AND_OP;
8951     if(prec <= INFIX_PREC_REL)
8952         return PLUGIN_REL_OP;
8953     if(prec <= INFIX_PREC_ADD)
8954         return PLUGIN_ADD_OP;
8955     if(prec <= INFIX_PREC_MUL)
8956         return PLUGIN_MUL_OP;
8957     if(prec <= INFIX_PREC_POW)
8958         return PLUGIN_POW_OP;
8959     return PLUGIN_HIGH_OP;
8960 }
8961
8962 OP *
8963 Perl_build_infix_plugin(pTHX_ OP *lhs, OP *rhs, void *tokendata)
8964 {
8965     PERL_ARGS_ASSERT_BUILD_INFIX_PLUGIN;
8966
8967     struct Perl_custom_infix_result *result = (struct Perl_custom_infix_result *)tokendata;
8968     SAVEFREEPV(result);
8969     if(result->parsedata)
8970         SAVEFREESV(result->parsedata);
8971
8972     return (*result->def->build_op)(aTHX_
8973         &result->parsedata, lhs, rhs, result->def);
8974 }
8975
8976 static int
8977 yyl_keylookup(pTHX_ char *s, GV *gv)
8978 {
8979     STRLEN len;
8980     bool anydelim;
8981     I32 key;
8982     struct code c = no_code;
8983     I32 orig_keyword = 0;
8984     char *d;
8985
8986     c.gv = gv;
8987
8988     PL_bufptr = s;
8989     s = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len, FALSE);
8990
8991     /* Some keywords can be followed by any delimiter, including ':' */
8992     anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8993
8994     /* x::* is just a word, unless x is "CORE" */
8995     if (!anydelim && *s == ':' && s[1] == ':') {
8996         if (memEQs(PL_tokenbuf, len, "CORE"))
8997             return yyl_key_core(aTHX_ s, len, c);
8998         return yyl_just_a_word(aTHX_ s, len, 0, c);
8999     }
9000
9001     d = s;
9002     while (d < PL_bufend && isSPACE(*d))
9003             d++;        /* no comments skipped here, or s### is misparsed */
9004
9005     /* Is this a word before a => operator? */
9006     if (*d == '=' && d[1] == '>') {
9007         return yyl_fatcomma(aTHX_ s, len);
9008     }
9009
9010     /* Check for plugged-in keyword */
9011     {
9012         OP *o;
9013         int result;
9014         char *saved_bufptr = PL_bufptr;
9015         PL_bufptr = s;
9016         result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
9017         s = PL_bufptr;
9018         if (result == KEYWORD_PLUGIN_DECLINE) {
9019             /* not a plugged-in keyword */
9020             PL_bufptr = saved_bufptr;
9021         } else if (result == KEYWORD_PLUGIN_STMT) {
9022             pl_yylval.opval = o;
9023             CLINE;
9024             if (!PL_nexttoke) PL_expect = XSTATE;
9025             return REPORT(PLUGSTMT);
9026         } else if (result == KEYWORD_PLUGIN_EXPR) {
9027             pl_yylval.opval = o;
9028             CLINE;
9029             if (!PL_nexttoke) PL_expect = XOPERATOR;
9030             return REPORT(PLUGEXPR);
9031         } else {
9032             Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
9033         }
9034     }
9035
9036     /* Check for plugged-in named operator */
9037     if(PLUGINFIX_IS_ENABLED) {
9038         struct Perl_custom_infix *def;
9039         STRLEN result;
9040         result = PL_infix_plugin(aTHX_ PL_tokenbuf, len, &def);
9041         if(result) {
9042             if(result != len)
9043                 Perl_croak(aTHX_ "Bad infix plugin result (%zd) - did not consume entire identifier <%s>\n",
9044                     result, PL_tokenbuf);
9045             PL_bufptr = s = d;
9046             struct Perl_custom_infix_result *result;
9047             Newx(result, 1, struct Perl_custom_infix_result);
9048             result->def = def;
9049             result->parsedata = NULL;
9050             if(def->parse) {
9051                 (*def->parse)(aTHX_ &result->parsedata, def);
9052                 s = PL_bufptr; /* restore local s variable */
9053             }
9054             pl_yylval.pval = result;
9055             CLINE;
9056             OPERATOR(tokentype_for_plugop(def));
9057         }
9058     }
9059
9060     /* Is this a label? */
9061     if (!anydelim && PL_expect == XSTATE
9062           && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
9063         s = d + 1;
9064         pl_yylval.opval =
9065             newSVOP(OP_CONST, 0,
9066                 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
9067         CLINE;
9068         TOKEN(LABEL);
9069     }
9070
9071     /* Check for lexical sub */
9072     if (PL_expect != XOPERATOR) {
9073         char tmpbuf[sizeof PL_tokenbuf + 1];
9074         *tmpbuf = '&';
9075         Copy(PL_tokenbuf, tmpbuf+1, len, char);
9076         c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
9077         if (c.off != NOT_IN_PAD) {
9078             assert(c.off); /* we assume this is boolean-true below */
9079             if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
9080                 HV *  const stash = PAD_COMPNAME_OURSTASH(c.off);
9081                 HEK * const stashname = HvNAME_HEK(stash);
9082                 c.sv = newSVhek(stashname);
9083                 sv_catpvs(c.sv, "::");
9084                 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
9085                                 (UTF ? SV_CATUTF8 : SV_CATBYTES));
9086                 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
9087                                   SVt_PVCV);
9088                 c.off = 0;
9089                 if (!c.gv) {
9090                     ASSUME(c.sv && SvREFCNT(c.sv) == 1);
9091                     SvREFCNT_dec(c.sv);
9092                     c.sv = NULL;
9093                     return yyl_just_a_word(aTHX_ s, len, 0, c);
9094                 }
9095             }
9096             else {
9097                 c.rv2cv_op = newOP(OP_PADANY, 0);
9098                 c.rv2cv_op->op_targ = c.off;
9099                 c.cv = find_lexical_cv(c.off);
9100             }
9101             c.lex = TRUE;
9102             return yyl_just_a_word(aTHX_ s, len, 0, c);
9103         }
9104         c.off = 0;
9105     }
9106
9107     /* Check for built-in keyword */
9108     key = keyword(PL_tokenbuf, len, 0);
9109
9110     if (key < 0)
9111         key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
9112
9113     if (key && key != KEY___DATA__ && key != KEY___END__
9114      && (!anydelim || *s != '#')) {
9115         /* no override, and not s### either; skipspace is safe here
9116          * check for => on following line */
9117         bool arrow;
9118         STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
9119         STRLEN   soff = s         - SvPVX(PL_linestr);
9120         s = peekspace(s);
9121         arrow = *s == '=' && s[1] == '>';
9122         PL_bufptr = SvPVX(PL_linestr) + bufoff;
9123         s         = SvPVX(PL_linestr) +   soff;
9124         if (arrow)
9125             return yyl_fatcomma(aTHX_ s, len);
9126     }
9127
9128     return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
9129 }
9130
9131 static int
9132 yyl_try(pTHX_ char *s)
9133 {
9134     char *d;
9135     GV *gv = NULL;
9136     int tok;
9137
9138   retry:
9139     /* Check for plugged-in symbolic operator */
9140     if(PLUGINFIX_IS_ENABLED && isPLUGINFIX_FIRST(*s)) {
9141         struct Perl_custom_infix *def;
9142         char *s_end = s, *d = PL_tokenbuf;
9143         STRLEN len;
9144
9145         /* Copy the longest sequence of isPLUGINFIX() chars into PL_tokenbuf */
9146         while(s_end < PL_bufend && d < PL_tokenbuf+sizeof(PL_tokenbuf)-1 && isPLUGINFIX(*s_end))
9147             *d++ = *s_end++;
9148         *d = '\0';
9149
9150         if((len = (*PL_infix_plugin)(aTHX_ PL_tokenbuf, s_end - s, &def))) {
9151             s += len;
9152             struct Perl_custom_infix_result *result;
9153             Newx(result, 1, struct Perl_custom_infix_result);
9154             result->def = def;
9155             result->parsedata = NULL;
9156             if(def->parse) {
9157                 PL_bufptr = s;
9158                 (*def->parse)(aTHX_ &result->parsedata, def);
9159                 s = PL_bufptr; /* restore local s variable */
9160             }
9161             pl_yylval.pval = result;
9162             CLINE;
9163             OPERATOR(tokentype_for_plugop(def));
9164         }
9165     }
9166
9167     switch (*s) {
9168     default:
9169         if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
9170             if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9171                 return tok;
9172             goto retry_bufptr;
9173         }
9174         yyl_croak_unrecognised(aTHX_ s);
9175
9176     case 4:
9177     case 26:
9178         /* emulate EOF on ^D or ^Z */
9179         if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
9180             return tok;
9181     retry_bufptr:
9182         s = PL_bufptr;
9183         goto retry;
9184
9185     case 0:
9186         if ((!PL_rsfp || PL_lex_inwhat)
9187          && (!PL_parser->filtered || s+1 < PL_bufend)) {
9188             PL_last_uni = 0;
9189             PL_last_lop = 0;
9190             if (PL_lex_brackets
9191                 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
9192             {
9193                 yyerror((const char *)
9194                         (PL_lex_formbrack
9195                          ? "Format not terminated"
9196                          : "Missing right curly or square bracket"));
9197             }
9198             DEBUG_T({
9199                 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
9200             });
9201             TOKEN(0);
9202         }
9203         if (s++ < PL_bufend)
9204             goto retry;  /* ignore stray nulls */
9205         PL_last_uni = 0;
9206         PL_last_lop = 0;
9207         if (!PL_in_eval && !PL_preambled) {
9208             PL_preambled = TRUE;
9209             if (PL_perldb) {
9210                 /* Generate a string of Perl code to load the debugger.
9211                  * If PERL5DB is set, it will return the contents of that,
9212                  * otherwise a compile-time require of perl5db.pl.  */
9213
9214                 const char * const pdb = PerlEnv_getenv("PERL5DB");
9215
9216                 if (pdb) {
9217                     sv_setpv(PL_linestr, pdb);
9218                     sv_catpvs(PL_linestr,";");
9219                 } else {
9220                     SETERRNO(0,SS_NORMAL);
9221                     sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
9222                 }
9223                 PL_parser->preambling = CopLINE(PL_curcop);
9224             } else
9225                 SvPVCLEAR(PL_linestr);
9226             if (PL_preambleav) {
9227                 SV **svp = AvARRAY(PL_preambleav);
9228                 SV **const end = svp + AvFILLp(PL_preambleav);
9229                 while(svp <= end) {
9230                     sv_catsv(PL_linestr, *svp);
9231                     ++svp;
9232                     sv_catpvs(PL_linestr, ";");
9233                 }
9234                 SvREFCNT_dec(MUTABLE_SV(PL_preambleav));
9235                 PL_preambleav = NULL;
9236             }
9237             if (PL_minus_E)
9238                 sv_catpvs(PL_linestr,
9239                           "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
9240             if (PL_minus_n || PL_minus_p) {
9241                 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
9242                 if (PL_minus_l)
9243                     sv_catpvs(PL_linestr,"chomp;");
9244                 if (PL_minus_a) {
9245                     if (PL_minus_F) {
9246                         if (   (   *PL_splitstr == '/'
9247                                 || *PL_splitstr == '\''
9248                                 || *PL_splitstr == '"')
9249                             && strchr(PL_splitstr + 1, *PL_splitstr))
9250                         {
9251                             /* strchr is ok, because -F pattern can't contain
9252                              * embedded NULs */
9253                             Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
9254                         }
9255                         else {
9256                             /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
9257                                bytes can be used as quoting characters.  :-) */
9258                             const char *splits = PL_splitstr;
9259                             sv_catpvs(PL_linestr, "our @F=split(q\0");
9260                             do {
9261                                 /* Need to \ \s  */
9262                                 if (*splits == '\\')
9263                                     sv_catpvn(PL_linestr, splits, 1);
9264                                 sv_catpvn(PL_linestr, splits, 1);
9265                             } while (*splits++);
9266                             /* This loop will embed the trailing NUL of
9267                                PL_linestr as the last thing it does before
9268                                terminating.  */
9269                             sv_catpvs(PL_linestr, ");");
9270                         }
9271                     }
9272                     else
9273                         sv_catpvs(PL_linestr,"our @F=split(' ');");
9274                 }
9275             }
9276             sv_catpvs(PL_linestr, "\n");
9277             PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
9278             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9279             PL_last_lop = PL_last_uni = NULL;
9280             if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
9281                 update_debugger_info(PL_linestr, NULL, 0);
9282             goto retry;
9283         }
9284         if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
9285             return tok;
9286         goto retry_bufptr;
9287
9288     case '\r':
9289 #ifdef PERL_STRICT_CR
9290         Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
9291         Perl_croak(aTHX_
9292       "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
9293 #endif
9294     case ' ': case '\t': case '\f': case '\v':
9295         s++;
9296         goto retry;
9297
9298     case '#':
9299     case '\n': {
9300         const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
9301         if (needs_semicolon)
9302             TOKEN(PERLY_SEMICOLON);
9303         else
9304             goto retry;
9305     }
9306
9307     case '-':
9308         return yyl_hyphen(aTHX_ s);
9309
9310     case '+':
9311         return yyl_plus(aTHX_ s);
9312
9313     case '*':
9314         return yyl_star(aTHX_ s);
9315
9316     case '%':
9317         return yyl_percent(aTHX_ s);
9318
9319     case '^':
9320         return yyl_caret(aTHX_ s);
9321
9322     case '[':
9323         return yyl_leftsquare(aTHX_ s);
9324
9325     case '~':
9326         return yyl_tilde(aTHX_ s);
9327
9328     case ',':
9329         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9330             TOKEN(0);
9331         s++;
9332         OPERATOR(PERLY_COMMA);
9333     case ':':
9334         if (s[1] == ':')
9335             return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9336         return yyl_colon(aTHX_ s + 1);
9337
9338     case '(':
9339         return yyl_leftparen(aTHX_ s + 1);
9340
9341     case ';':
9342         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9343             TOKEN(0);
9344         CLINE;
9345         s++;
9346         PL_expect = XSTATE;
9347         TOKEN(PERLY_SEMICOLON);
9348
9349     case ')':
9350         return yyl_rightparen(aTHX_ s);
9351
9352     case ']':
9353         return yyl_rightsquare(aTHX_ s);
9354
9355     case '{':
9356         return yyl_leftcurly(aTHX_ s + 1, 0);
9357
9358     case '}':
9359         if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9360             TOKEN(0);
9361         return yyl_rightcurly(aTHX_ s, 0);
9362
9363     case '&':
9364         return yyl_ampersand(aTHX_ s);
9365
9366     case '|':
9367         return yyl_verticalbar(aTHX_ s);
9368
9369     case '=':
9370         if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9371             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9372         {
9373             s = vcs_conflict_marker(s + 7);
9374             goto retry;
9375         }
9376
9377         s++;
9378         {
9379             const char tmp = *s++;
9380             if (tmp == '=') {
9381                 if (!PL_lex_allbrackets
9382                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9383                 {
9384                     s -= 2;
9385                     TOKEN(0);
9386                 }
9387                 ChEop(OP_EQ);
9388             }
9389             if (tmp == '>') {
9390                 if (!PL_lex_allbrackets
9391                     && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9392                 {
9393                     s -= 2;
9394                     TOKEN(0);
9395                 }
9396                 OPERATOR(PERLY_COMMA);
9397             }
9398             if (tmp == '~')
9399                 PMop(OP_MATCH);
9400             if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9401                 && memCHRs("+-*/%.^&|<",tmp))
9402                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9403                             "Reversed %c= operator",(int)tmp);
9404             s--;
9405             if (PL_expect == XSTATE
9406                 && isALPHA(tmp)
9407                 && (s == PL_linestart+1 || s[-2] == '\n') )
9408             {
9409                 if (   (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9410                     || PL_lex_state != LEX_NORMAL)
9411                 {
9412                     d = PL_bufend;
9413                     while (s < d) {
9414                         if (*s++ == '\n') {
9415                             incline(s, PL_bufend);
9416                             if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9417                             {
9418                                 s = (char *) memchr(s,'\n', d - s);
9419                                 if (s)
9420                                     s++;
9421                                 else
9422                                     s = d;
9423                                 incline(s, PL_bufend);
9424                                 goto retry;
9425                             }
9426                         }
9427                     }
9428                     goto retry;
9429                 }
9430                 s = PL_bufend;
9431                 PL_parser->in_pod = 1;
9432                 goto retry;
9433             }
9434         }
9435         if (PL_expect == XBLOCK) {
9436             const char *t = s;
9437 #ifdef PERL_STRICT_CR
9438             while (SPACE_OR_TAB(*t))
9439 #else
9440             while (SPACE_OR_TAB(*t) || *t == '\r')
9441 #endif
9442                 t++;
9443             if (*t == '\n' || *t == '#') {
9444                 ENTER_with_name("lex_format");
9445                 SAVEI8(PL_parser->form_lex_state);
9446                 SAVEI32(PL_lex_formbrack);
9447                 PL_parser->form_lex_state = PL_lex_state;
9448                 PL_lex_formbrack = PL_lex_brackets + 1;
9449                 PL_parser->sub_error_count = PL_error_count;
9450                 return yyl_leftcurly(aTHX_ s, 1);
9451             }
9452         }
9453         if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9454             s--;
9455             TOKEN(0);
9456         }
9457         pl_yylval.ival = 0;
9458         OPERATOR(ASSIGNOP);
9459
9460         case '!':
9461         return yyl_bang(aTHX_ s + 1);
9462
9463     case '<':
9464         if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9465             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9466         {
9467             s = vcs_conflict_marker(s + 7);
9468             goto retry;
9469         }
9470         return yyl_leftpointy(aTHX_ s);
9471
9472     case '>':
9473         if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9474             && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9475         {
9476             s = vcs_conflict_marker(s + 7);
9477             goto retry;
9478         }
9479         return yyl_rightpointy(aTHX_ s + 1);
9480
9481     case '$':
9482         return yyl_dollar(aTHX_ s);
9483
9484     case '@':
9485         return yyl_snail(aTHX_ s);
9486
9487     case '/':                   /* may be division, defined-or, or pattern */
9488         return yyl_slash(aTHX_ s);
9489
9490      case '?':                  /* conditional */
9491         s++;
9492         if (!PL_lex_allbrackets
9493             && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9494         {
9495             s--;
9496             TOKEN(0);
9497         }
9498         PL_lex_allbrackets++;
9499         OPERATOR(PERLY_QUESTION_MARK);
9500
9501     case '.':
9502         if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9503 #ifdef PERL_STRICT_CR
9504             && s[1] == '\n'
9505 #else
9506             && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9507 #endif
9508             && (s == PL_linestart || s[-1] == '\n') )
9509         {
9510             PL_expect = XSTATE;
9511             /* formbrack==2 means dot seen where arguments expected */
9512             return yyl_rightcurly(aTHX_ s, 2);
9513         }
9514         if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9515             s += 3;
9516             OPERATOR(YADAYADA);
9517         }
9518         if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9519             char tmp = *s++;
9520             if (*s == tmp) {
9521                 if (!PL_lex_allbrackets
9522                     && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9523                 {
9524                     s--;
9525                     TOKEN(0);
9526                 }
9527                 s++;
9528                 if (*s == tmp) {
9529                     s++;
9530                     pl_yylval.ival = OPf_SPECIAL;
9531                 }
9532                 else
9533                     pl_yylval.ival = 0;
9534                 OPERATOR(DOTDOT);
9535             }
9536             if (*s == '=' && !PL_lex_allbrackets
9537                 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9538             {
9539                 s--;
9540                 TOKEN(0);
9541             }
9542             Aop(OP_CONCAT);
9543         }
9544         /* FALLTHROUGH */
9545     case '0': case '1': case '2': case '3': case '4':
9546     case '5': case '6': case '7': case '8': case '9':
9547         s = scan_num(s, &pl_yylval);
9548         DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9549         if (PL_expect == XOPERATOR)
9550             no_op("Number",s);
9551         TERM(THING);
9552
9553     case '\'':
9554         return yyl_sglquote(aTHX_ s);
9555
9556     case '"':
9557         return yyl_dblquote(aTHX_ s);
9558
9559     case '`':
9560         return yyl_backtick(aTHX_ s);
9561
9562     case '\\':
9563         return yyl_backslash(aTHX_ s + 1);
9564
9565     case 'v':
9566         if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9567             char *start = s + 2;
9568             while (isDIGIT(*start) || *start == '_')
9569                 start++;
9570             if (*start == '.' && isDIGIT(start[1])) {
9571                 s = scan_num(s, &pl_yylval);
9572                 TERM(THING);
9573             }
9574             else if ((*start == ':' && start[1] == ':')
9575                      || (PL_expect == XSTATE && *start == ':')) {
9576                 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9577                     return tok;
9578                 goto retry_bufptr;
9579             }
9580             else if (PL_expect == XSTATE) {
9581                 d = start;
9582                 while (d < PL_bufend && isSPACE(*d)) d++;
9583                 if (*d == ':') {
9584                     if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9585                         return tok;
9586                     goto retry_bufptr;
9587                 }
9588             }
9589             /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9590             if (!isALPHA(*start) && (PL_expect == XTERM
9591                         || PL_expect == XREF || PL_expect == XSTATE
9592                         || PL_expect == XTERMORDORDOR)) {
9593                 GV *const gv = gv_fetchpvn_flags(s, start - s,
9594                                                     UTF ? SVf_UTF8 : 0, SVt_PVCV);
9595                 if (!gv) {
9596                     s = scan_num(s, &pl_yylval);
9597                     TERM(THING);
9598                 }
9599             }
9600         }
9601         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9602             return tok;
9603         goto retry_bufptr;
9604
9605     case 'x':
9606         if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9607             s++;
9608             Mop(OP_REPEAT);
9609         }
9610         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9611             return tok;
9612         goto retry_bufptr;
9613
9614     case '_':
9615     case 'a': case 'A':
9616     case 'b': case 'B':
9617     case 'c': case 'C':
9618     case 'd': case 'D':
9619     case 'e': case 'E':
9620     case 'f': case 'F':
9621     case 'g': case 'G':
9622     case 'h': case 'H':
9623     case 'i': case 'I':
9624     case 'j': case 'J':
9625     case 'k': case 'K':
9626     case 'l': case 'L':
9627     case 'm': case 'M':
9628     case 'n': case 'N':
9629     case 'o': case 'O':
9630     case 'p': case 'P':
9631     case 'q': case 'Q':
9632     case 'r': case 'R':
9633     case 's': case 'S':
9634     case 't': case 'T':
9635     case 'u': case 'U':
9636               case 'V':
9637     case 'w': case 'W':
9638               case 'X':
9639     case 'y': case 'Y':
9640     case 'z': case 'Z':
9641         if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9642             return tok;
9643         goto retry_bufptr;
9644     }
9645 }
9646
9647
9648 /*
9649   yylex
9650
9651   Works out what to call the token just pulled out of the input
9652   stream.  The yacc parser takes care of taking the ops we return and
9653   stitching them into a tree.
9654
9655   Returns:
9656     The type of the next token
9657
9658   Structure:
9659       Check if we have already built the token; if so, use it.
9660       Switch based on the current state:
9661           - if we have a case modifier in a string, deal with that
9662           - handle other cases of interpolation inside a string
9663           - scan the next line if we are inside a format
9664       In the normal state, switch on the next character:
9665           - default:
9666             if alphabetic, go to key lookup
9667             unrecognized character - croak
9668           - 0/4/26: handle end-of-line or EOF
9669           - cases for whitespace
9670           - \n and #: handle comments and line numbers
9671           - various operators, brackets and sigils
9672           - numbers
9673           - quotes
9674           - 'v': vstrings (or go to key lookup)
9675           - 'x' repetition operator (or go to key lookup)
9676           - other ASCII alphanumerics (key lookup begins here):
9677               word before => ?
9678               keyword plugin
9679               scan built-in keyword (but do nothing with it yet)
9680               check for statement label
9681               check for lexical subs
9682                   return yyl_just_a_word if there is one
9683               see whether built-in keyword is overridden
9684               switch on keyword number:
9685                   - default: return yyl_just_a_word:
9686                       not a built-in keyword; handle bareword lookup
9687                       disambiguate between method and sub call
9688                       fall back to bareword
9689                   - cases for built-in keywords
9690 */
9691
9692 int
9693 Perl_yylex(pTHX)
9694 {
9695     char *s = PL_bufptr;
9696
9697     if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9698         const U8* first_bad_char_loc;
9699         if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9700                                                         PL_bufend - PL_bufptr,
9701                                                         &first_bad_char_loc)))
9702         {
9703             _force_out_malformed_utf8_message(first_bad_char_loc,
9704                                               (U8 *) PL_bufend,
9705                                               0,
9706                                               1 /* 1 means die */ );
9707             NOT_REACHED; /* NOTREACHED */
9708         }
9709         PL_parser->recheck_utf8_validity = FALSE;
9710     }
9711     DEBUG_T( {
9712         SV* tmp = newSVpvs("");
9713         PerlIO_printf(Perl_debug_log, "### %" LINE_Tf ":LEX_%s/X%s %s\n",
9714             CopLINE(PL_curcop),
9715             lex_state_names[PL_lex_state],
9716             exp_name[PL_expect],
9717             pv_display(tmp, s, strlen(s), 0, 60));
9718         SvREFCNT_dec(tmp);
9719     } );
9720
9721     /* when we've already built the next token, just pull it out of the queue */
9722     if (PL_nexttoke) {
9723         PL_nexttoke--;
9724         pl_yylval = PL_nextval[PL_nexttoke];
9725         {
9726             I32 next_type;
9727             next_type = PL_nexttype[PL_nexttoke];
9728             if (next_type & (7<<24)) {
9729                 if (next_type & (1<<24)) {
9730                     if (PL_lex_brackets > 100)
9731                         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9732                     PL_lex_brackstack[PL_lex_brackets++] =
9733                         (char) ((U8) (next_type >> 16));
9734                 }
9735                 if (next_type & (2<<24))
9736                     PL_lex_allbrackets++;
9737                 if (next_type & (4<<24))
9738                     PL_lex_allbrackets--;
9739                 next_type &= 0xffff;
9740             }
9741             return REPORT(next_type == 'p' ? pending_ident() : next_type);
9742         }
9743     }
9744
9745     switch (PL_lex_state) {
9746     case LEX_NORMAL:
9747     case LEX_INTERPNORMAL:
9748         break;
9749
9750     /* interpolated case modifiers like \L \U, including \Q and \E.
9751        when we get here, PL_bufptr is at the \
9752     */
9753     case LEX_INTERPCASEMOD:
9754         /* handle \E or end of string */
9755         return yyl_interpcasemod(aTHX_ s);
9756
9757     case LEX_INTERPPUSH:
9758         return REPORT(sublex_push());
9759
9760     case LEX_INTERPSTART:
9761         if (PL_bufptr == PL_bufend)
9762             return REPORT(sublex_done());
9763         DEBUG_T({
9764             if(*PL_bufptr != '(')
9765                 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9766         });
9767         PL_expect = XTERM;
9768         /* for /@a/, we leave the joining for the regex engine to do
9769          * (unless we're within \Q etc) */
9770         PL_lex_dojoin = (*PL_bufptr == '@'
9771                             && (!PL_lex_inpat || PL_lex_casemods));
9772         PL_lex_state = LEX_INTERPNORMAL;
9773         if (PL_lex_dojoin) {
9774             NEXTVAL_NEXTTOKE.ival = 0;
9775             force_next(PERLY_COMMA);
9776             force_ident("\"", PERLY_DOLLAR);
9777             NEXTVAL_NEXTTOKE.ival = 0;
9778             force_next(PERLY_DOLLAR);
9779             NEXTVAL_NEXTTOKE.ival = 0;
9780             force_next((2<<24)|PERLY_PAREN_OPEN);
9781             NEXTVAL_NEXTTOKE.ival = OP_JOIN;    /* emulate join($", ...) */
9782             force_next(FUNC);
9783         }
9784         /* Convert (?{...}) or (*{...}) and friends to 'do {...}' */
9785         if (PL_lex_inpat && *PL_bufptr == '(') {
9786             PL_parser->lex_shared->re_eval_start = PL_bufptr;
9787             PL_bufptr += 2;
9788             if (*PL_bufptr != '{')
9789                 PL_bufptr++;
9790             PL_expect = XTERMBLOCK;
9791             force_next(KW_DO);
9792         }
9793
9794         if (PL_lex_starts++) {
9795             s = PL_bufptr;
9796             /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9797             if (!PL_lex_casemods && PL_lex_inpat)
9798                 TOKEN(PERLY_COMMA);
9799             else
9800                 AopNOASSIGN(OP_CONCAT);
9801         }
9802         return yylex();
9803
9804     case LEX_INTERPENDMAYBE:
9805         if (intuit_more(PL_bufptr, PL_bufend)) {
9806             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
9807             break;
9808         }
9809         /* FALLTHROUGH */
9810
9811     case LEX_INTERPEND:
9812         if (PL_lex_dojoin) {
9813             const U8 dojoin_was = PL_lex_dojoin;
9814             PL_lex_dojoin = FALSE;
9815             PL_lex_state = LEX_INTERPCONCAT;
9816             PL_lex_allbrackets--;
9817             return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9818         }
9819         if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9820             && SvEVALED(PL_lex_repl))
9821         {
9822             if (PL_bufptr != PL_bufend)
9823                 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9824             PL_lex_repl = NULL;
9825         }
9826         /* Paranoia.  re_eval_start is adjusted when S_scan_heredoc sets
9827            re_eval_str.  If the here-doc body's length equals the previous
9828            value of re_eval_start, re_eval_start will now be null.  So
9829            check re_eval_str as well. */
9830         if (PL_parser->lex_shared->re_eval_start
9831          || PL_parser->lex_shared->re_eval_str) {
9832             SV *sv;
9833             if (*PL_bufptr != ')')
9834                 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9835             PL_bufptr++;
9836             /* having compiled a (?{..}) expression, return the original
9837              * text too, as a const */
9838             if (PL_parser->lex_shared->re_eval_str) {
9839                 sv = PL_parser->lex_shared->re_eval_str;
9840                 PL_parser->lex_shared->re_eval_str = NULL;
9841                 SvCUR_set(sv,
9842                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9843                 SvPV_shrink_to_cur(sv);
9844             }
9845             else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9846                          PL_bufptr - PL_parser->lex_shared->re_eval_start);
9847             NEXTVAL_NEXTTOKE.opval =
9848                     newSVOP(OP_CONST, 0,
9849                                  sv);
9850             force_next(THING);
9851             PL_parser->lex_shared->re_eval_start = NULL;
9852             PL_expect = XTERM;
9853             return REPORT(PERLY_COMMA);
9854         }
9855
9856         /* FALLTHROUGH */
9857     case LEX_INTERPCONCAT:
9858 #ifdef DEBUGGING
9859         if (PL_lex_brackets)
9860             Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9861                        (long) PL_lex_brackets);
9862 #endif
9863         if (PL_bufptr == PL_bufend)
9864             return REPORT(sublex_done());
9865
9866         /* m'foo' still needs to be parsed for possible (?{...}) */
9867         if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9868             SV *sv = newSVsv(PL_linestr);
9869             sv = tokeq(sv);
9870             pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9871             s = PL_bufend;
9872         }
9873         else {
9874             int save_error_count = PL_error_count;
9875
9876             s = scan_const(PL_bufptr);
9877
9878             /* Set flag if this was a pattern and there were errors.  op.c will
9879              * refuse to compile a pattern with this flag set.  Otherwise, we
9880              * could get segfaults, etc. */
9881             if (PL_lex_inpat && PL_error_count > save_error_count) {
9882                 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9883             }
9884             if (*s == '\\')
9885                 PL_lex_state = LEX_INTERPCASEMOD;
9886             else
9887                 PL_lex_state = LEX_INTERPSTART;
9888         }
9889
9890         if (s != PL_bufptr) {
9891             NEXTVAL_NEXTTOKE = pl_yylval;
9892             PL_expect = XTERM;
9893             force_next(THING);
9894             if (PL_lex_starts++) {
9895                 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9896                 if (!PL_lex_casemods && PL_lex_inpat)
9897                     TOKEN(PERLY_COMMA);
9898                 else
9899                     AopNOASSIGN(OP_CONCAT);
9900             }
9901             else {
9902                 PL_bufptr = s;
9903                 return yylex();
9904             }
9905         }
9906
9907         return yylex();
9908     case LEX_FORMLINE:
9909         if (PL_parser->sub_error_count != PL_error_count) {
9910             /* There was an error parsing a formline, which tends to
9911                mess up the parser.
9912                Unlike interpolated sub-parsing, we can't treat any of
9913                these as recoverable, so no need to check sub_no_recover.
9914             */
9915             yyquit();
9916         }
9917         assert(PL_lex_formbrack);
9918         s = scan_formline(PL_bufptr);
9919         if (!PL_lex_formbrack)
9920             return yyl_rightcurly(aTHX_ s, 1);
9921         PL_bufptr = s;
9922         return yylex();
9923     }
9924
9925     /* We really do *not* want PL_linestr ever becoming a COW. */
9926     assert (!SvIsCOW(PL_linestr));
9927     s = PL_bufptr;
9928     PL_oldoldbufptr = PL_oldbufptr;
9929     PL_oldbufptr = s;
9930
9931     if (PL_in_my == KEY_sigvar) {
9932         PL_parser->saw_infix_sigil = 0;
9933         return yyl_sigvar(aTHX_ s);
9934     }
9935
9936     {
9937         /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9938            On its return, we then need to set it to indicate whether the token
9939            we just encountered was an infix operator that (if we hadn't been
9940            expecting an operator) have been a sigil.
9941         */
9942         bool expected_operator = (PL_expect == XOPERATOR);
9943         int ret = yyl_try(aTHX_ s);
9944         switch (pl_yylval.ival) {
9945         case OP_BIT_AND:
9946         case OP_MODULO:
9947         case OP_MULTIPLY:
9948         case OP_NBIT_AND:
9949             if (expected_operator) {
9950                 PL_parser->saw_infix_sigil = 1;
9951                 break;
9952             }
9953             /* FALLTHROUGH */
9954         default:
9955             PL_parser->saw_infix_sigil = 0;
9956         }
9957         return ret;
9958     }
9959 }
9960
9961
9962 /*
9963   S_pending_ident
9964
9965   Looks up an identifier in the pad or in a package
9966
9967   PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9968   rather than a plain pad var.
9969
9970   Returns:
9971     PRIVATEREF if this is a lexical name.
9972     BAREWORD   if this belongs to a package.
9973
9974   Structure:
9975       if we're in a my declaration
9976           croak if they tried to say my($foo::bar)
9977           build the ops for a my() declaration
9978       if it's an access to a my() variable
9979           build ops for access to a my() variable
9980       if in a dq string, and they've said @foo and we can't find @foo
9981           warn
9982       build ops for a bareword
9983 */
9984
9985 static int
9986 S_pending_ident(pTHX)
9987 {
9988     PADOFFSET tmp = 0;
9989     const char pit = (char)pl_yylval.ival;
9990     const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9991     /* All routes through this function want to know if there is a colon.  */
9992     const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9993
9994     DEBUG_T({ PerlIO_printf(Perl_debug_log,
9995           "### Pending identifier '%s'\n", PL_tokenbuf); });
9996     assert(tokenbuf_len >= 2);
9997
9998     /* if we're in a my(), we can't allow dynamics here.
9999        $foo'bar has already been turned into $foo::bar, so
10000        just check for colons.
10001
10002        if it's a legal name, the OP is a PADANY.
10003     */
10004     if (PL_in_my) {
10005         if (PL_in_my == KEY_our) {      /* "our" is merely analogous to "my" */
10006             if (has_colon)
10007                 /* diag_listed_as: No package name allowed for variable %s
10008                                    in "our" */
10009                 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
10010                                   "%s %s in \"our\"",
10011                                   *PL_tokenbuf=='&' ? "subroutine" : "variable",
10012                                   PL_tokenbuf), UTF ? SVf_UTF8 : 0);
10013             tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
10014         }
10015         else {
10016             OP *o;
10017             if (has_colon) {
10018                 /* "my" variable %s can't be in a package */
10019                 /* PL_no_myglob is constant */
10020                 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
10021                 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
10022                             PL_in_my == KEY_my ? "my" :
10023                             PL_in_my == KEY_field ? "field" : "state",
10024                             *PL_tokenbuf == '&' ? "subroutine" : "variable",
10025                             PL_tokenbuf),
10026                             UTF ? SVf_UTF8 : 0);
10027                 GCC_DIAG_RESTORE_STMT;
10028             }
10029
10030             if (PL_in_my == KEY_sigvar) {
10031                 /* A signature 'padop' needs in addition, an op_first to
10032                  * point to a child sigdefelem, and an extra field to hold
10033                  * the signature index. We can achieve both by using an
10034                  * UNOP_AUX and (ab)using the op_aux field to hold the
10035                  * index. If we ever need more fields, use a real malloced
10036                  * aux strut instead.
10037                  */
10038                 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
10039                                     INT2PTR(UNOP_AUX_item *,
10040                                         (PL_parser->sig_elems)));
10041                 o->op_private |= (  PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
10042                                   : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
10043                                   :                         OPpARGELEM_HV);
10044             }
10045             else
10046                 o = newOP(OP_PADANY, 0);
10047             o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
10048                                                         UTF ? SVf_UTF8 : 0);
10049             if (PL_in_my == KEY_sigvar)
10050                 PL_in_my = 0;
10051
10052             pl_yylval.opval = o;
10053             return PRIVATEREF;
10054         }
10055     }
10056
10057     /*
10058        build the ops for accesses to a my() variable.
10059     */
10060
10061     if (!has_colon) {
10062         if (!PL_in_my)
10063             tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
10064                                  0);
10065         if (tmp != NOT_IN_PAD) {
10066             /* might be an "our" variable" */
10067             if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10068                 /* build ops for a bareword */
10069                 HV *  const stash = PAD_COMPNAME_OURSTASH(tmp);
10070                 HEK * const stashname = HvNAME_HEK(stash);
10071                 SV *  const sym = newSVhek(stashname);
10072                 sv_catpvs(sym, "::");
10073                 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
10074                 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
10075                 pl_yylval.opval->op_private = OPpCONST_ENTERED;
10076                 if (pit != '&')
10077                   gv_fetchsv(sym,
10078                     GV_ADDMULTI,
10079                     ((PL_tokenbuf[0] == '$') ? SVt_PV
10080                      : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10081                      : SVt_PVHV));
10082                 return BAREWORD;
10083             }
10084
10085             pl_yylval.opval = newOP(OP_PADANY, 0);
10086             pl_yylval.opval->op_targ = tmp;
10087             return PRIVATEREF;
10088         }
10089     }
10090
10091     /*
10092        Whine if they've said @foo or @foo{key} in a doublequoted string,
10093        and @foo (or %foo) isn't a variable we can find in the symbol
10094        table.
10095     */
10096     if (ckWARN(WARN_AMBIGUOUS)
10097         && pit == '@'
10098         && PL_lex_state != LEX_NORMAL
10099         && !PL_lex_brackets)
10100     {
10101         GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10102                                          ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
10103                                          SVt_PVAV);
10104         if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
10105            )
10106         {
10107             /* Downgraded from fatal to warning 20000522 mjd */
10108             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10109                         "Possible unintended interpolation of %" UTF8f
10110                         " in string",
10111                         UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
10112         }
10113     }
10114
10115     /* build ops for a bareword */
10116     pl_yylval.opval = newSVOP(OP_CONST, 0,
10117                                    newSVpvn_flags(PL_tokenbuf + 1,
10118                                                       tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10119                                                       UTF ? SVf_UTF8 : 0 ));
10120     pl_yylval.opval->op_private = OPpCONST_ENTERED;
10121     if (pit != '&')
10122         gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
10123                      (PL_in_eval ? GV_ADDMULTI : GV_ADD)
10124                      | ( UTF ? SVf_UTF8 : 0 ),
10125                      ((PL_tokenbuf[0] == '$') ? SVt_PV
10126                       : (PL_tokenbuf[0] == '@') ? SVt_PVAV
10127                       : SVt_PVHV));
10128     return BAREWORD;
10129 }
10130
10131 STATIC void
10132 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
10133 {
10134     PERL_ARGS_ASSERT_CHECKCOMMA;
10135
10136     if (*s == ' ' && s[1] == '(') {     /* XXX gotta be a better way */
10137         if (ckWARN(WARN_SYNTAX)) {
10138             int level = 1;
10139             const char *w;
10140             for (w = s+2; *w && level; w++) {
10141                 if (*w == '(')
10142                     ++level;
10143                 else if (*w == ')')
10144                     --level;
10145             }
10146             while (isSPACE(*w))
10147                 ++w;
10148             /* the list of chars below is for end of statements or
10149              * block / parens, boolean operators (&&, ||, //) and branch
10150              * constructs (or, and, if, until, unless, while, err, for).
10151              * Not a very solid hack... */
10152             if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
10153                 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10154                             "%s (...) interpreted as function",name);
10155         }
10156     }
10157     while (s < PL_bufend && isSPACE(*s))
10158         s++;
10159     if (*s == '(')
10160         s++;
10161     while (s < PL_bufend && isSPACE(*s))
10162         s++;
10163     if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
10164         const char * const w = s;
10165         s += UTF ? UTF8SKIP(s) : 1;
10166         while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10167             s += UTF ? UTF8SKIP(s) : 1;
10168         while (s < PL_bufend && isSPACE(*s))
10169             s++;
10170         if (*s == ',') {
10171             GV* gv;
10172             if (keyword(w, s - w, 0))
10173                 return;
10174
10175             gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
10176             if (gv && GvCVu(gv))
10177                 return;
10178             if (s - w <= 254) {
10179                 PADOFFSET off;
10180                 char tmpbuf[256];
10181                 Copy(w, tmpbuf+1, s - w, char);
10182                 *tmpbuf = '&';
10183                 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
10184                 if (off != NOT_IN_PAD) return;
10185             }
10186             Perl_croak(aTHX_ "No comma allowed after %s", what);
10187         }
10188     }
10189 }
10190
10191 /* S_new_constant(): do any overload::constant lookup.
10192
10193    Either returns sv, or mortalizes/frees sv and returns a new SV*.
10194    Best used as sv=new_constant(..., sv, ...).
10195    If s, pv are NULL, calls subroutine with one argument,
10196    and <type> is used with error messages only.
10197    <type> is assumed to be well formed UTF-8.
10198
10199    If error_msg is not NULL, *error_msg will be set to any error encountered.
10200    Otherwise yyerror() will be used to output it */
10201
10202 STATIC SV *
10203 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
10204                SV *sv, SV *pv, const char *type, STRLEN typelen,
10205                const char ** error_msg)
10206 {
10207     dSP;
10208     HV * table = GvHV(PL_hintgv);                /* ^H */
10209     SV *res;
10210     SV *errsv = NULL;
10211     SV **cvp;
10212     SV *cv, *typesv;
10213     const char *why1 = "", *why2 = "", *why3 = "";
10214     const char * optional_colon = ":";  /* Only some messages have a colon */
10215     char *msg;
10216
10217     PERL_ARGS_ASSERT_NEW_CONSTANT;
10218     /* We assume that this is true: */
10219     assert(type || s);
10220
10221     sv_2mortal(sv);                     /* Parent created it permanently */
10222
10223     if (   ! table
10224         || ! (PL_hints & HINT_LOCALIZE_HH))
10225     {
10226         why1 = "unknown";
10227         optional_colon = "";
10228         goto report;
10229     }
10230
10231     cvp = hv_fetch(table, key, keylen, FALSE);
10232     if (!cvp || !SvOK(*cvp)) {
10233         why1 = "$^H{";
10234         why2 = key;
10235         why3 = "} is not defined";
10236         goto report;
10237     }
10238
10239     cv = *cvp;
10240     if (!pv && s)
10241         pv = newSVpvn_flags(s, len, SVs_TEMP);
10242     if (type && pv)
10243         typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
10244     else
10245         typesv = &PL_sv_undef;
10246
10247     PUSHSTACKi(PERLSI_OVERLOAD);
10248     ENTER ;
10249     SAVETMPS;
10250
10251     PUSHMARK(SP) ;
10252     EXTEND(sp, 3);
10253     if (pv)
10254         PUSHs(pv);
10255     PUSHs(sv);
10256     if (pv)
10257         PUSHs(typesv);
10258     PUTBACK;
10259     call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
10260
10261     SPAGAIN ;
10262
10263     /* Check the eval first */
10264     if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
10265         STRLEN errlen;
10266         const char * errstr;
10267         sv_catpvs(errsv, "Propagated");
10268         errstr = SvPV_const(errsv, errlen);
10269         yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
10270         (void)POPs;
10271         res = SvREFCNT_inc_simple_NN(sv);
10272     }
10273     else {
10274         res = POPs;
10275         SvREFCNT_inc_simple_void_NN(res);
10276     }
10277
10278     PUTBACK ;
10279     FREETMPS ;
10280     LEAVE ;
10281     POPSTACK;
10282
10283     if (SvOK(res)) {
10284         return res;
10285     }
10286
10287     sv = res;
10288     (void)sv_2mortal(sv);
10289
10290     why1 = "Call to &{$^H{";
10291     why2 = key;
10292     why3 = "}} did not return a defined value";
10293
10294   report:
10295
10296     msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
10297                         (int)(type ? typelen : len),
10298                         (type ? type: s),
10299                         optional_colon,
10300                         why1, why2, why3);
10301     if (error_msg) {
10302         *error_msg = msg;
10303     }
10304     else {
10305         yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
10306     }
10307     return SvREFCNT_inc_simple_NN(sv);
10308 }
10309
10310 PERL_STATIC_INLINE void
10311 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
10312                     bool is_utf8, bool check_dollar, bool tick_warn)
10313 {
10314     int saw_tick = 0;
10315     const char *olds = *s;
10316     PERL_ARGS_ASSERT_PARSE_IDENT;
10317
10318     while (*s < PL_bufend) {
10319         if (*d >= e)
10320             Perl_croak(aTHX_ "%s", ident_too_long);
10321         if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
10322              /* The UTF-8 case must come first, otherwise things
10323              * like c\N{COMBINING TILDE} would start failing, as the
10324              * isWORDCHAR_A case below would gobble the 'c' up.
10325              */
10326
10327             char *t = *s + UTF8SKIP(*s);
10328             while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10329                 t += UTF8SKIP(t);
10330             }
10331             if (*d + (t - *s) > e)
10332                 Perl_croak(aTHX_ "%s", ident_too_long);
10333             Copy(*s, *d, t - *s, char);
10334             *d += t - *s;
10335             *s = t;
10336         }
10337         else if ( isWORDCHAR_A(**s) ) {
10338             do {
10339                 *(*d)++ = *(*s)++;
10340             } while (isWORDCHAR_A(**s) && *d < e);
10341         }
10342         else if (   allow_package
10343                  && **s == '\''
10344                  && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10345         {
10346             *(*d)++ = ':';
10347             *(*d)++ = ':';
10348             (*s)++;
10349             saw_tick++;
10350         }
10351         else if (allow_package && **s == ':' && (*s)[1] == ':'
10352            /* Disallow things like Foo::$bar. For the curious, this is
10353             * the code path that triggers the "Bad name after" warning
10354             * when looking for barewords.
10355             */
10356            && !(check_dollar && (*s)[2] == '$')) {
10357             *(*d)++ = *(*s)++;
10358             *(*d)++ = *(*s)++;
10359         }
10360         else
10361             break;
10362     }
10363     if (UNLIKELY(saw_tick && tick_warn && ckWARN2_d(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR))) {
10364         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10365             char *this_d;
10366             char *d2;
10367             Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10368             d2 = this_d;
10369             SAVEFREEPV(this_d);
10370
10371             Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10372                         "Old package separator used in string");
10373             if (olds[-1] == '#')
10374                 *d2++ = olds[-2];
10375             *d2++ = olds[-1];
10376             while (olds < *s) {
10377                 if (*olds == '\'') {
10378                     *d2++ = '\\';
10379                     *d2++ = *olds++;
10380                 }
10381                 else
10382                     *d2++ = *olds++;
10383             }
10384             Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10385                         "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10386                         UTF8fARG(is_utf8, d2-this_d, this_d));
10387         }
10388         else {
10389             Perl_warner(aTHX_ packWARN2(WARN_SYNTAX, WARN_DEPRECATED__APOSTROPHE_AS_PACKAGE_SEPARATOR),
10390                         "Old package separator \"'\" deprecated");
10391         }
10392     }
10393     return;
10394 }
10395
10396 /* Returns a NUL terminated string, with the length of the string written to
10397    *slp
10398
10399    scan_word6() may be removed once ' in names is removed.
10400    */
10401 char *
10402 Perl_scan_word6(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp, bool warn_tick)
10403 {
10404     char *d = dest;
10405     char * const e = d + destlen - 3;  /* two-character token, ending NUL */
10406     bool is_utf8 = cBOOL(UTF);
10407
10408     PERL_ARGS_ASSERT_SCAN_WORD6;
10409
10410     parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, warn_tick);
10411     *d = '\0';
10412     *slp = d - dest;
10413     return s;
10414 }
10415
10416 char *
10417 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10418 {
10419     PERL_ARGS_ASSERT_SCAN_WORD;
10420     return scan_word6(s, dest, destlen, allow_package, slp, FALSE);
10421 }
10422
10423 /* scan s and extract an identifier ($var) from it if possible
10424  * into dest.
10425  * XXX: This function has subtle implications on parsing, and
10426  * changing how it behaves can cause a variable to change from
10427  * being a run time rv2sv call or a compile time binding to a
10428  * specific variable name.
10429  */
10430 STATIC char *
10431 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10432 {
10433     I32 herelines = PL_parser->herelines;
10434     SSize_t bracket = -1;
10435     char funny = *s++;
10436     char *d = dest;
10437     char * const e = d + destlen - 3;    /* two-character token, ending NUL */
10438     bool is_utf8 = cBOOL(UTF);
10439     line_t orig_copline = 0, tmp_copline = 0;
10440
10441     PERL_ARGS_ASSERT_SCAN_IDENT;
10442
10443     if (isSPACE(*s) || !*s)
10444         s = skipspace(s);
10445     if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10446         bool is_zero= *s == '0' ? TRUE : FALSE;
10447         char *digit_start= d;
10448         *d++ = *s++;
10449         while (s < PL_bufend && isDIGIT(*s)) {
10450             if (d >= e)
10451                 Perl_croak(aTHX_ "%s", ident_too_long);
10452             *d++ = *s++;
10453         }
10454         if (is_zero && d - digit_start > 1)
10455             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10456     }
10457     else {  /* See if it is a "normal" identifier */
10458         parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10459     }
10460     *d = '\0';
10461     d = dest;
10462     if (*d) {
10463         /* Either a digit variable, or parse_ident() found an identifier
10464            (anything valid as a bareword), so job done and return.  */
10465         if (PL_lex_state != LEX_NORMAL)
10466             PL_lex_state = LEX_INTERPENDMAYBE;
10467         return s;
10468     }
10469
10470     /* Here, it is not a run-of-the-mill identifier name */
10471
10472     if (*s == '$' && s[1]
10473         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10474             || isDIGIT_A((U8)s[1])
10475             || s[1] == '$'
10476             || s[1] == '{'
10477             || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10478     {
10479         /* Dereferencing a value in a scalar variable.
10480            The alternatives are different syntaxes for a scalar variable.
10481            Using ' as a leading package separator isn't allowed. :: is.   */
10482         return s;
10483     }
10484     /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
10485     if (*s == '{') {
10486         bracket = s - SvPVX(PL_linestr);
10487         s++;
10488         orig_copline = CopLINE(PL_curcop);
10489         if (s < PL_bufend && isSPACE(*s)) {
10490             s = skipspace(s);
10491         }
10492     }
10493
10494
10495     /* Extract the first character of the variable name from 's' and
10496      * copy it, null terminated into 'd'. Note that this does not
10497      * involve checking for just IDFIRST characters, as it allows the
10498      * '^' for ${^FOO} type variable names, and it allows all the
10499      * characters that are legal in a single character variable name.
10500      *
10501      * The legal ones are any of:
10502      *  a) all ASCII characters except:
10503      *          1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10504      *          2) '{'
10505      *     The final case currently doesn't get this far in the program, so we
10506      *     don't test for it.  If that were to change, it would be ok to allow it.
10507      *  b) When not under Unicode rules, any upper Latin1 character
10508      *  c) Otherwise, when unicode rules are used, all XIDS characters.
10509      *
10510      *      Because all ASCII characters have the same representation whether
10511      *      encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10512      *      '{' without knowing if is UTF-8 or not. */
10513
10514     if ((s <= PL_bufend - ((is_utf8)
10515                           ? UTF8SKIP(s)
10516                           : 1))
10517         && (
10518             isGRAPH_A(*s)
10519             ||
10520             ( is_utf8
10521               ? isIDFIRST_utf8_safe(s, PL_bufend)
10522               : (isGRAPH_L1(*s)
10523                  && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10524                 )
10525             )
10526         )
10527     ){
10528         if (is_utf8) {
10529             const STRLEN skip = UTF8SKIP(s);
10530             STRLEN i;
10531             d[skip] = '\0';
10532             for ( i = 0; i < skip; i++ )
10533                 d[i] = *s++;
10534         }
10535         else {
10536             *d = *s++;
10537             d[1] = '\0';
10538         }
10539     }
10540
10541     /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10542     if (isDIGIT(*d)) {
10543         bool is_zero= *d == '0' ? TRUE : FALSE;
10544         char *digit_start= d;
10545         while (s < PL_bufend && isDIGIT(*s)) {
10546             d++;
10547             if (d >= e)
10548                 Perl_croak(aTHX_ "%s", ident_too_long);
10549             *d= *s++;
10550         }
10551         if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10552             Perl_croak(aTHX_ ident_var_zero_multi_digit);
10553         d[1] = '\0';
10554     }
10555
10556     /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10557     else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10558         *d = toCTRL(*s);
10559         s++;
10560     }
10561     /* Warn about ambiguous code after unary operators if {...} notation isn't
10562        used.  There's no difference in ambiguity; it's merely a heuristic
10563        about when not to warn.  */
10564     else if (ck_uni && bracket == -1)
10565         check_uni();
10566
10567     if (bracket != -1) {
10568         bool skip;
10569         char *s2;
10570         /* If we were processing {...} notation then...  */
10571         if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10572             || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10573                  && isWORDCHAR(*s))
10574         ) {
10575             /* note we have to check for a normal identifier first,
10576              * as it handles utf8 symbols, and only after that has
10577              * been ruled out can we look at the caret words */
10578             if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10579                 /* if it starts as a valid identifier, assume that it is one.
10580                    (the later check for } being at the expected point will trap
10581                    cases where this doesn't pan out.)  */
10582                 d += is_utf8 ? UTF8SKIP(d) : 1;
10583                 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10584                 *d = '\0';
10585             }
10586             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10587                 d++;
10588                 while (isWORDCHAR(*s) && d < e) {
10589                     *d++ = *s++;
10590                 }
10591                 if (d >= e)
10592                     Perl_croak(aTHX_ "%s", ident_too_long);
10593                 *d = '\0';
10594             }
10595             tmp_copline = CopLINE(PL_curcop);
10596             if (s < PL_bufend && isSPACE(*s)) {
10597                 s = skipspace(s);
10598             }
10599             if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10600                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
10601                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10602                     const char * const brack =
10603                         (const char *)
10604                         ((*s == '[') ? "[...]" : "{...}");
10605                     orig_copline = CopLINE(PL_curcop);
10606                     CopLINE_set(PL_curcop, tmp_copline);
10607    /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10608                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10609                         "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10610                         funny, dest, brack, funny, dest, brack);
10611                     CopLINE_set(PL_curcop, orig_copline);
10612                 }
10613                 bracket++;
10614                 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10615                 PL_lex_allbrackets++;
10616                 return s;
10617             }
10618         }
10619
10620         if ( !tmp_copline )
10621             tmp_copline = CopLINE(PL_curcop);
10622         if ((skip = s < PL_bufend && isSPACE(*s))) {
10623             /* Avoid incrementing line numbers or resetting PL_linestart,
10624                in case we have to back up.  */
10625             STRLEN s_off = s - SvPVX(PL_linestr);
10626             s2 = peekspace(s);
10627             s = SvPVX(PL_linestr) + s_off;
10628         }
10629         else
10630             s2 = s;
10631
10632         /* Expect to find a closing } after consuming any trailing whitespace.
10633          */
10634         if (*s2 == '}') {
10635             /* Now increment line numbers if applicable.  */
10636             if (skip)
10637                 s = skipspace(s);
10638             s++;
10639             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10640                 PL_lex_state = LEX_INTERPEND;
10641                 PL_expect = XREF;
10642             }
10643             if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10644                 if (ckWARN(WARN_AMBIGUOUS)
10645                     && (keyword(dest, d - dest, 0)
10646                         || get_cvn_flags(dest, d - dest, is_utf8
10647                            ? SVf_UTF8
10648                            : 0)))
10649                 {
10650                     SV *tmp = newSVpvn_flags( dest, d - dest,
10651                                         SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10652                     if (funny == '#')
10653                         funny = '@';
10654                     orig_copline = CopLINE(PL_curcop);
10655                     CopLINE_set(PL_curcop, tmp_copline);
10656                     Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10657                         "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10658                         funny, SVfARG(tmp), funny, SVfARG(tmp));
10659                     CopLINE_set(PL_curcop, orig_copline);
10660                 }
10661             }
10662         }
10663         else {
10664             /* Didn't find the closing } at the point we expected, so restore
10665                state such that the next thing to process is the opening { and */
10666             s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10667             CopLINE_set(PL_curcop, orig_copline);
10668             PL_parser->herelines = herelines;
10669             *dest = '\0';
10670             PL_parser->sub_no_recover = TRUE;
10671         }
10672     }
10673     else if (   PL_lex_state == LEX_INTERPNORMAL
10674              && !PL_lex_brackets
10675              && !intuit_more(s, PL_bufend))
10676         PL_lex_state = LEX_INTERPEND;
10677     return s;
10678 }
10679
10680 static bool
10681 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10682
10683     /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10684      * found in the parse starting at 's', based on the subset that are valid
10685      * in this context input to this routine in 'valid_flags'. Advances s.
10686      * Returns TRUE if the input should be treated as a valid flag, so the next
10687      * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10688      * upon first call on the current regex.  This routine will set it to any
10689      * charset modifier found.  The caller shouldn't change it.  This way,
10690      * another charset modifier encountered in the parse can be detected as an
10691      * error, as we have decided to allow only one */
10692
10693     const char c = **s;
10694     STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10695
10696     if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10697         if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10698             yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10699                        UTF ? SVf_UTF8 : 0);
10700             (*s) += charlen;
10701             /* Pretend that it worked, so will continue processing before
10702              * dieing */
10703             return TRUE;
10704         }
10705         return FALSE;
10706     }
10707
10708     switch (c) {
10709
10710         CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10711         case GLOBAL_PAT_MOD:      *pmfl |= PMf_GLOBAL; break;
10712         case CONTINUE_PAT_MOD:    *pmfl |= PMf_CONTINUE; break;
10713         case ONCE_PAT_MOD:        *pmfl |= PMf_KEEP; break;
10714         case KEEPCOPY_PAT_MOD:    *pmfl |= RXf_PMf_KEEPCOPY; break;
10715         case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10716         case LOCALE_PAT_MOD:
10717             if (*charset) {
10718                 goto multiple_charsets;
10719             }
10720             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10721             *charset = c;
10722             break;
10723         case UNICODE_PAT_MOD:
10724             if (*charset) {
10725                 goto multiple_charsets;
10726             }
10727             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10728             *charset = c;
10729             break;
10730         case ASCII_RESTRICT_PAT_MOD:
10731             if (! *charset) {
10732                 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10733             }
10734             else {
10735
10736                 /* Error if previous modifier wasn't an 'a', but if it was, see
10737                  * if, and accept, a second occurrence (only) */
10738                 if (*charset != 'a'
10739                     || get_regex_charset(*pmfl)
10740                         != REGEX_ASCII_RESTRICTED_CHARSET)
10741                 {
10742                         goto multiple_charsets;
10743                 }
10744                 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10745             }
10746             *charset = c;
10747             break;
10748         case DEPENDS_PAT_MOD:
10749             if (*charset) {
10750                 goto multiple_charsets;
10751             }
10752             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10753             *charset = c;
10754             break;
10755     }
10756
10757     (*s)++;
10758     return TRUE;
10759
10760     multiple_charsets:
10761         if (*charset != c) {
10762             yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10763         }
10764         else if (c == 'a') {
10765   /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10766             yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10767         }
10768         else {
10769             yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10770         }
10771
10772         /* Pretend that it worked, so will continue processing before dieing */
10773         (*s)++;
10774         return TRUE;
10775 }
10776
10777 STATIC char *
10778 S_scan_pat(pTHX_ char *start, I32 type)
10779 {
10780     PMOP *pm;
10781     char *s;
10782     const char * const valid_flags =
10783         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10784     char charset = '\0';    /* character set modifier */
10785     unsigned int x_mod_count = 0;
10786
10787     PERL_ARGS_ASSERT_SCAN_PAT;
10788
10789     s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10790     if (!s)
10791         Perl_croak(aTHX_ "Search pattern not terminated");
10792
10793     pm = (PMOP*)newPMOP(type, 0);
10794     if (PL_multi_open == '?') {
10795         /* This is the only point in the code that sets PMf_ONCE:  */
10796         pm->op_pmflags |= PMf_ONCE;
10797
10798         /* Hence it's safe to do this bit of PMOP book-keeping here, which
10799            allows us to restrict the list needed by reset to just the ??
10800            matches.  */
10801         assert(type != OP_TRANS);
10802         if (PL_curstash) {
10803             MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10804             U32 elements;
10805             if (!mg) {
10806                 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10807                                  0);
10808             }
10809             elements = mg->mg_len / sizeof(PMOP**);
10810             Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10811             ((PMOP**)mg->mg_ptr) [elements++] = pm;
10812             mg->mg_len = elements * sizeof(PMOP**);
10813             PmopSTASH_set(pm,PL_curstash);
10814         }
10815     }
10816
10817     /* if qr/...(?{..}).../, then need to parse the pattern within a new
10818      * anon CV. False positives like qr/[(?{]/ are harmless */
10819
10820     if (type == OP_QR) {
10821         STRLEN len;
10822         char *e, *p = SvPV(PL_lex_stuff, len);
10823         e = p + len;
10824         for (; p < e; p++) {
10825             if (p[0] == '(' && (
10826                 (p[1] == '?' && (p[2] == '{' ||
10827                                 (p[2] == '?' && p[3] == '{'))) ||
10828                 (p[1] == '*' && (p[2] == '{' ||
10829                                 (p[2] == '*' && p[3] == '{')))
10830             )){
10831                 pm->op_pmflags |= PMf_HAS_CV;
10832                 break;
10833             }
10834         }
10835         pm->op_pmflags |= PMf_IS_QR;
10836     }
10837
10838     while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10839                                 &s, &charset, &x_mod_count))
10840     {};
10841     /* issue a warning if /c is specified,but /g is not */
10842     if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10843     {
10844         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10845                        "Use of /c modifier is meaningless without /g" );
10846     }
10847
10848     PL_lex_op = (OP*)pm;
10849     pl_yylval.ival = OP_MATCH;
10850     return s;
10851 }
10852
10853 STATIC char *
10854 S_scan_subst(pTHX_ char *start)
10855 {
10856     char *s;
10857     PMOP *pm;
10858     I32 first_start;
10859     line_t first_line;
10860     line_t linediff = 0;
10861     I32 es = 0;
10862     char charset = '\0';    /* character set modifier */
10863     unsigned int x_mod_count = 0;
10864     char *t;
10865
10866     PERL_ARGS_ASSERT_SCAN_SUBST;
10867
10868     pl_yylval.ival = OP_NULL;
10869
10870     s = scan_str(start, TRUE, FALSE, FALSE, &t);
10871
10872     if (!s)
10873         Perl_croak(aTHX_ "Substitution pattern not terminated");
10874
10875     s = t;
10876
10877     first_start = PL_multi_start;
10878     first_line = CopLINE(PL_curcop);
10879     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10880     if (!s) {
10881         SvREFCNT_dec_NN(PL_lex_stuff);
10882         PL_lex_stuff = NULL;
10883         Perl_croak(aTHX_ "Substitution replacement not terminated");
10884     }
10885     PL_multi_start = first_start;       /* so whole substitution is taken together */
10886
10887     pm = (PMOP*)newPMOP(OP_SUBST, 0);
10888
10889
10890     while (*s) {
10891         if (*s == EXEC_PAT_MOD) {
10892             s++;
10893             es++;
10894         }
10895         else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10896                                   &s, &charset, &x_mod_count))
10897         {
10898             break;
10899         }
10900     }
10901
10902     if ((pm->op_pmflags & PMf_CONTINUE)) {
10903         Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10904     }
10905
10906     if (es) {
10907         SV * const repl = newSVpvs("");
10908
10909         PL_multi_end = 0;
10910         pm->op_pmflags |= PMf_EVAL;
10911         for (; es > 1; es--) {
10912             sv_catpvs(repl, "eval ");
10913         }
10914         sv_catpvs(repl, "do {");
10915         sv_catsv(repl, PL_parser->lex_sub_repl);
10916         sv_catpvs(repl, "}");
10917         SvREFCNT_dec(PL_parser->lex_sub_repl);
10918         PL_parser->lex_sub_repl = repl;
10919     }
10920
10921
10922     linediff = CopLINE(PL_curcop) - first_line;
10923     if (linediff)
10924         CopLINE_set(PL_curcop, first_line);
10925
10926     if (linediff || es) {
10927         /* the IVX field indicates that the replacement string is a s///e;
10928          * the NVX field indicates how many src code lines the replacement
10929          * spreads over */
10930         sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10931         ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10932         ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10933                                                                     cBOOL(es);
10934     }
10935
10936     PL_lex_op = (OP*)pm;
10937     pl_yylval.ival = OP_SUBST;
10938     return s;
10939 }
10940
10941 STATIC char *
10942 S_scan_trans(pTHX_ char *start)
10943 {
10944     char* s;
10945     OP *o;
10946     U8 squash;
10947     U8 del;
10948     U8 complement;
10949     bool nondestruct = 0;
10950     char *t;
10951
10952     PERL_ARGS_ASSERT_SCAN_TRANS;
10953
10954     pl_yylval.ival = OP_NULL;
10955
10956     s = scan_str(start,FALSE,FALSE,FALSE,&t);
10957     if (!s)
10958         Perl_croak(aTHX_ "Transliteration pattern not terminated");
10959
10960     s = t;
10961
10962     s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10963     if (!s) {
10964         SvREFCNT_dec_NN(PL_lex_stuff);
10965         PL_lex_stuff = NULL;
10966         Perl_croak(aTHX_ "Transliteration replacement not terminated");
10967     }
10968
10969     complement = del = squash = 0;
10970     while (1) {
10971         switch (*s) {
10972         case 'c':
10973             complement = OPpTRANS_COMPLEMENT;
10974             break;
10975         case 'd':
10976             del = OPpTRANS_DELETE;
10977             break;
10978         case 's':
10979             squash = OPpTRANS_SQUASH;
10980             break;
10981         case 'r':
10982             nondestruct = 1;
10983             break;
10984         default:
10985             goto no_more;
10986         }
10987         s++;
10988     }
10989   no_more:
10990
10991     o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10992     o->op_private &= ~OPpTRANS_ALL;
10993     o->op_private |= del|squash|complement;
10994
10995     PL_lex_op = o;
10996     pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10997
10998
10999     return s;
11000 }
11001
11002 /* scan_heredoc
11003    Takes a pointer to the first < in <<FOO.
11004    Returns a pointer to the byte following <<FOO.
11005
11006    This function scans a heredoc, which involves different methods
11007    depending on whether we are in a string eval, quoted construct, etc.
11008    This is because PL_linestr could containing a single line of input, or
11009    a whole string being evalled, or the contents of the current quote-
11010    like operator.
11011
11012    The two basic methods are:
11013     - Steal lines from the input stream
11014     - Scan the heredoc in PL_linestr and remove it therefrom
11015
11016    In a file scope or filtered eval, the first method is used; in a
11017    string eval, the second.
11018
11019    In a quote-like operator, we have to choose between the two,
11020    depending on where we can find a newline.  We peek into outer lex-
11021    ing scopes until we find one with a newline in it.  If we reach the
11022    outermost lexing scope and it is a file, we use the stream method.
11023    Otherwise it is treated as an eval.
11024 */
11025
11026 STATIC char *
11027 S_scan_heredoc(pTHX_ char *s)
11028 {
11029     I32 op_type = OP_SCALAR;
11030     I32 len;
11031     SV *tmpstr;
11032     char term;
11033     char *d;
11034     char *e;
11035     char *peek;
11036     char *indent = 0;
11037     I32 indent_len = 0;
11038     bool indented = FALSE;
11039     const bool infile = PL_rsfp || PL_parser->filtered;
11040     const line_t origline = CopLINE(PL_curcop);
11041     LEXSHARED *shared = PL_parser->lex_shared;
11042
11043     PERL_ARGS_ASSERT_SCAN_HEREDOC;
11044
11045     s += 2;
11046     d = PL_tokenbuf + 1;
11047     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
11048     *PL_tokenbuf = '\n';
11049     peek = s;
11050
11051     if (*peek == '~') {
11052         indented = TRUE;
11053         peek++; s++;
11054     }
11055
11056     while (SPACE_OR_TAB(*peek))
11057         peek++;
11058
11059     if (*peek == '`' || *peek == '\'' || *peek =='"') {
11060         s = peek;
11061         term = *s++;
11062         s = delimcpy(d, e, s, PL_bufend, term, &len);
11063         if (s == PL_bufend)
11064             Perl_croak(aTHX_ "Unterminated delimiter for here document");
11065         d += len;
11066         s++;
11067     }
11068     else {
11069         if (*s == '\\')
11070             /* <<\FOO is equivalent to <<'FOO' */
11071             s++, term = '\'';
11072         else
11073             term = '"';
11074
11075         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
11076             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
11077
11078         peek = s;
11079
11080         while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
11081             peek += UTF ? UTF8SKIP(peek) : 1;
11082         }
11083
11084         len = (peek - s >= e - d) ? (e - d) : (peek - s);
11085         Copy(s, d, len, char);
11086         s += len;
11087         d += len;
11088     }
11089
11090     if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
11091         Perl_croak(aTHX_ "Delimiter for here document is too long");
11092
11093     *d++ = '\n';
11094     *d = '\0';
11095     len = d - PL_tokenbuf;
11096
11097 #ifndef PERL_STRICT_CR
11098     d = (char *) memchr(s, '\r', PL_bufend - s);
11099     if (d) {
11100         char * const olds = s;
11101         s = d;
11102         while (s < PL_bufend) {
11103             if (*s == '\r') {
11104                 *d++ = '\n';
11105                 if (*++s == '\n')
11106                     s++;
11107             }
11108             else if (*s == '\n' && s[1] == '\r') {      /* \015\013 on a mac? */
11109                 *d++ = *s++;
11110                 s++;
11111             }
11112             else
11113                 *d++ = *s++;
11114         }
11115         *d = '\0';
11116         PL_bufend = d;
11117         SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11118         s = olds;
11119     }
11120 #endif
11121
11122     tmpstr = newSV_type(SVt_PVIV);
11123     if (term == '\'') {
11124         op_type = OP_CONST;
11125         SvIV_set(tmpstr, -1);
11126     }
11127     else if (term == '`') {
11128         op_type = OP_BACKTICK;
11129         SvIV_set(tmpstr, '\\');
11130     }
11131
11132     PL_multi_start = origline + 1 + PL_parser->herelines;
11133     PL_multi_open = PL_multi_close = '<';
11134
11135     /* inside a string eval or quote-like operator */
11136     if (!infile || PL_lex_inwhat) {
11137         SV *linestr;
11138         char *bufend;
11139         char * const olds = s;
11140         PERL_CONTEXT * const cx = CX_CUR();
11141         /* These two fields are not set until an inner lexing scope is
11142            entered.  But we need them set here. */
11143         shared->ls_bufptr  = s;
11144         shared->ls_linestr = PL_linestr;
11145
11146         if (PL_lex_inwhat) {
11147             /* Look for a newline.  If the current buffer does not have one,
11148              peek into the line buffer of the parent lexing scope, going
11149              up as many levels as necessary to find one with a newline
11150              after bufptr.
11151             */
11152             while (!(s = (char *)memchr(
11153                                 (void *)shared->ls_bufptr, '\n',
11154                                 SvEND(shared->ls_linestr)-shared->ls_bufptr
11155                 )))
11156             {
11157                 shared = shared->ls_prev;
11158                 /* shared is only null if we have gone beyond the outermost
11159                    lexing scope.  In a file, we will have broken out of the
11160                    loop in the previous iteration.  In an eval, the string buf-
11161                    fer ends with "\n;", so the while condition above will have
11162                    evaluated to false.  So shared can never be null.  Or so you
11163                    might think.  Odd syntax errors like s;@{<<; can gobble up
11164                    the implicit semicolon at the end of a flie, causing the
11165                    file handle to be closed even when we are not in a string
11166                    eval.  So shared may be null in that case.
11167                    (Closing '>>}' here to balance the earlier open brace for
11168                    editors that look for matched pairs.) */
11169                 if (UNLIKELY(!shared))
11170                     goto interminable;
11171                 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
11172                    most lexing scope.  In a file, shared->ls_linestr at that
11173                    level is just one line, so there is no body to steal. */
11174                 if (infile && !shared->ls_prev) {
11175                     s = olds;
11176                     goto streaming;
11177                 }
11178             }
11179         }
11180         else {  /* eval or we've already hit EOF */
11181             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
11182             if (!s)
11183                 goto interminable;
11184         }
11185
11186         linestr = shared->ls_linestr;
11187         bufend = SvEND(linestr);
11188         d = s;
11189         if (indented) {
11190             char *myolds = s;
11191
11192             while (s < bufend - len + 1) {
11193                 if (*s++ == '\n')
11194                     ++PL_parser->herelines;
11195
11196                 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
11197                     char *backup = s;
11198                     indent_len = 0;
11199
11200                     /* Only valid if it's preceded by whitespace only */
11201                     while (backup != myolds && --backup >= myolds) {
11202                         if (! SPACE_OR_TAB(*backup)) {
11203                             break;
11204                         }
11205                         indent_len++;
11206                     }
11207
11208                     /* No whitespace or all! */
11209                     if (backup == s || *backup == '\n') {
11210                         Newx(indent, indent_len + 1, char);
11211                         memcpy(indent, backup + 1, indent_len);
11212                         indent[indent_len] = 0;
11213                         s--; /* before our delimiter */
11214                         PL_parser->herelines--; /* this line doesn't count */
11215                         break;
11216                     }
11217                 }
11218             }
11219         }
11220         else {
11221             while (s < bufend - len + 1
11222                    && memNE(s,PL_tokenbuf,len) )
11223             {
11224                 if (*s++ == '\n')
11225                     ++PL_parser->herelines;
11226             }
11227         }
11228
11229         if (s >= bufend - len + 1) {
11230             goto interminable;
11231         }
11232
11233         sv_setpvn_fresh(tmpstr,d+1,s-d);
11234         s += len - 1;
11235         /* the preceding stmt passes a newline */
11236         PL_parser->herelines++;
11237
11238         /* s now points to the newline after the heredoc terminator.
11239            d points to the newline before the body of the heredoc.
11240          */
11241
11242         /* We are going to modify linestr in place here, so set
11243            aside copies of the string if necessary for re-evals or
11244            (caller $n)[6]. */
11245         /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
11246            check shared->re_eval_str. */
11247         if (shared->re_eval_start || shared->re_eval_str) {
11248             /* Set aside the rest of the regexp */
11249             if (!shared->re_eval_str)
11250                 shared->re_eval_str =
11251                        newSVpvn(shared->re_eval_start,
11252                                 bufend - shared->re_eval_start);
11253             shared->re_eval_start -= s-d;
11254         }
11255
11256         if (cxstack_ix >= 0
11257             && CxTYPE(cx) == CXt_EVAL
11258             && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
11259             && cx->blk_eval.cur_text == linestr)
11260         {
11261             cx->blk_eval.cur_text = newSVsv(linestr);
11262             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
11263         }
11264
11265         /* Copy everything from s onwards back to d. */
11266         Move(s,d,bufend-s + 1,char);
11267         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
11268         /* Setting PL_bufend only applies when we have not dug deeper
11269            into other scopes, because sublex_done sets PL_bufend to
11270            SvEND(PL_linestr). */
11271         if (shared == PL_parser->lex_shared)
11272             PL_bufend = SvEND(linestr);
11273         s = olds;
11274     }
11275     else {
11276         SV *linestr_save;
11277         char *oldbufptr_save;
11278         char *oldoldbufptr_save;
11279       streaming:
11280         sv_grow_fresh(tmpstr, 80);
11281         SvPVCLEAR_FRESH(tmpstr);   /* avoid "uninitialized" warning */
11282         term = PL_tokenbuf[1];
11283         len--;
11284         linestr_save = PL_linestr; /* must restore this afterwards */
11285         d = s;                   /* and this */
11286         oldbufptr_save = PL_oldbufptr;
11287         oldoldbufptr_save = PL_oldoldbufptr;
11288         PL_linestr = newSVpvs("");
11289         PL_bufend = SvPVX(PL_linestr);
11290
11291         while (1) {
11292             PL_bufptr = PL_bufend;
11293             CopLINE_set(PL_curcop,
11294                         origline + 1 + PL_parser->herelines);
11295
11296             if (   !lex_next_chunk(LEX_NO_TERM)
11297                 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
11298             {
11299                 /* Simply freeing linestr_save might seem simpler here, as it
11300                    does not matter what PL_linestr points to, since we are
11301                    about to croak; but in a quote-like op, linestr_save
11302                    will have been prospectively freed already, via
11303                    SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
11304                    restore PL_linestr. */
11305                 SvREFCNT_dec_NN(PL_linestr);
11306                 PL_linestr = linestr_save;
11307                 PL_oldbufptr = oldbufptr_save;
11308                 PL_oldoldbufptr = oldoldbufptr_save;
11309                 goto interminable;
11310             }
11311
11312             CopLINE_set(PL_curcop, origline);
11313
11314             if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
11315                 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
11316                 /* ^That should be enough to avoid this needing to grow:  */
11317                 sv_catpvs(PL_linestr, "\n\0");
11318                 assert(s == SvPVX(PL_linestr));
11319                 PL_bufend = SvEND(PL_linestr);
11320             }
11321
11322             s = PL_bufptr;
11323             PL_parser->herelines++;
11324             PL_last_lop = PL_last_uni = NULL;
11325
11326 #ifndef PERL_STRICT_CR
11327             if (PL_bufend - PL_linestart >= 2) {
11328                 if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
11329                     || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
11330                 {
11331                     PL_bufend[-2] = '\n';
11332                     PL_bufend--;
11333                     SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
11334                 }
11335                 else if (PL_bufend[-1] == '\r')
11336                     PL_bufend[-1] = '\n';
11337             }
11338             else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
11339                 PL_bufend[-1] = '\n';
11340 #endif
11341
11342             if (indented && (PL_bufend-s) >= len) {
11343                 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11344
11345                 if (found) {
11346                     char *backup = found;
11347                     indent_len = 0;
11348
11349                     /* Only valid if it's preceded by whitespace only */
11350                     while (backup != s && --backup >= s) {
11351                         if (! SPACE_OR_TAB(*backup)) {
11352                             break;
11353                         }
11354                         indent_len++;
11355                     }
11356
11357                     /* All whitespace or none! */
11358                     if (backup == found || SPACE_OR_TAB(*backup)) {
11359                         Newx(indent, indent_len + 1, char);
11360                         memcpy(indent, backup, indent_len);
11361                         indent[indent_len] = 0;
11362                         SvREFCNT_dec(PL_linestr);
11363                         PL_linestr = linestr_save;
11364                         PL_linestart = SvPVX(linestr_save);
11365                         PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11366                         PL_oldbufptr = oldbufptr_save;
11367                         PL_oldoldbufptr = oldoldbufptr_save;
11368                         s = d;
11369                         break;
11370                     }
11371                 }
11372
11373                 /* Didn't find it */
11374                 sv_catsv(tmpstr,PL_linestr);
11375             }
11376             else {
11377                 if (*s == term && PL_bufend-s >= len
11378                     && memEQ(s,PL_tokenbuf + 1,len))
11379                 {
11380                     SvREFCNT_dec(PL_linestr);
11381                     PL_linestr = linestr_save;
11382                     PL_linestart = SvPVX(linestr_save);
11383                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11384                     PL_oldbufptr = oldbufptr_save;
11385                     PL_oldoldbufptr = oldoldbufptr_save;
11386                     s = d;
11387                     break;
11388                 }
11389                 else {
11390                     sv_catsv(tmpstr,PL_linestr);
11391                 }
11392             }
11393         } /* while (1) */
11394     }
11395
11396     PL_multi_end = origline + PL_parser->herelines;
11397
11398     if (indented && indent) {
11399         STRLEN linecount = 1;
11400         STRLEN herelen = SvCUR(tmpstr);
11401         char *ss = SvPVX(tmpstr);
11402         char *se = ss + herelen;
11403         SV *newstr = newSV(herelen+1);
11404         SvPOK_on(newstr);
11405
11406         /* Trim leading whitespace */
11407         while (ss < se) {
11408             /* newline only? Copy and move on */
11409             if (*ss == '\n') {
11410                 sv_catpvs(newstr,"\n");
11411                 ss++;
11412                 linecount++;
11413
11414             /* Found our indentation? Strip it */
11415             }
11416             else if (se - ss >= indent_len
11417                        && memEQ(ss, indent, indent_len))
11418             {
11419                 STRLEN le = 0;
11420                 ss += indent_len;
11421
11422                 while ((ss + le) < se && *(ss + le) != '\n')
11423                     le++;
11424
11425                 sv_catpvn(newstr, ss, le);
11426                 ss += le;
11427
11428             /* Line doesn't begin with our indentation? Croak */
11429             }
11430             else {
11431                 Safefree(indent);
11432                 Perl_croak(aTHX_
11433                     "Indentation on line %d of here-doc doesn't match delimiter",
11434                     (int)linecount
11435                 );
11436             }
11437         } /* while */
11438
11439         /* avoid sv_setsv() as we don't want to COW here */
11440         sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11441         Safefree(indent);
11442         SvREFCNT_dec_NN(newstr);
11443     }
11444
11445     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11446         SvPV_shrink_to_cur(tmpstr);
11447     }
11448
11449     if (!IN_BYTES) {
11450         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11451             SvUTF8_on(tmpstr);
11452     }
11453
11454     PL_lex_stuff = tmpstr;
11455     pl_yylval.ival = op_type;
11456     return s;
11457
11458   interminable:
11459     if (indent)
11460         Safefree(indent);
11461     SvREFCNT_dec(tmpstr);
11462     CopLINE_set(PL_curcop, origline);
11463     missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11464 }
11465
11466
11467 /* scan_inputsymbol
11468    takes: position of first '<' in input buffer
11469    returns: position of first char following the matching '>' in
11470             input buffer
11471    side-effects: pl_yylval and lex_op are set.
11472
11473    This code handles:
11474
11475    <>           read from ARGV
11476    <<>>         read from ARGV without magic open
11477    <FH>         read from filehandle
11478    <pkg::FH>    read from package qualified filehandle
11479    <pkg'FH>     read from package qualified filehandle
11480    <$fh>        read from filehandle in $fh
11481    <*.h>        filename glob
11482
11483 */
11484
11485 STATIC char *
11486 S_scan_inputsymbol(pTHX_ char *start)
11487 {
11488     char *s = start;            /* current position in buffer */
11489     char *end;
11490     I32 len;
11491     bool nomagicopen = FALSE;
11492     char *d = PL_tokenbuf;                                      /* start of temp holding space */
11493     const char * const e = PL_tokenbuf + sizeof PL_tokenbuf;    /* end of temp holding space */
11494
11495     PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11496
11497     end = (char *) memchr(s, '\n', PL_bufend - s);
11498     if (!end)
11499         end = PL_bufend;
11500     if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11501         nomagicopen = TRUE;
11502         *d = '\0';
11503         len = 0;
11504         s += 3;
11505     }
11506     else
11507         s = delimcpy(d, e, s + 1, end, '>', &len);      /* extract until > */
11508
11509     /* die if we didn't have space for the contents of the <>,
11510        or if it didn't end, or if we see a newline
11511     */
11512
11513     if (len >= (I32)sizeof PL_tokenbuf)
11514         Perl_croak(aTHX_ "Excessively long <> operator");
11515     if (s >= end)
11516         Perl_croak(aTHX_ "Unterminated <> operator");
11517
11518     s++;
11519
11520     /* check for <$fh>
11521        Remember, only scalar variables are interpreted as filehandles by
11522        this code.  Anything more complex (e.g., <$fh{$num}>) will be
11523        treated as a glob() call.
11524        This code makes use of the fact that except for the $ at the front,
11525        a scalar variable and a filehandle look the same.
11526     */
11527     if (*d == '$' && d[1]) d++;
11528
11529     /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11530     while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11531         d += UTF ? UTF8SKIP(d) : 1;
11532     }
11533
11534     /* If we've tried to read what we allow filehandles to look like, and
11535        there's still text left, then it must be a glob() and not a getline.
11536        Use scan_str to pull out the stuff between the <> and treat it
11537        as nothing more than a string.
11538     */
11539
11540     if (d - PL_tokenbuf != len) {
11541         pl_yylval.ival = OP_GLOB;
11542         s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11543         if (!s)
11544            Perl_croak(aTHX_ "Glob not terminated");
11545         return s;
11546     }
11547     else {
11548         bool readline_overridden = FALSE;
11549         GV *gv_readline;
11550         /* we're in a filehandle read situation */
11551         d = PL_tokenbuf;
11552
11553         /* turn <> into <ARGV> */
11554         if (!len)
11555             Copy("ARGV",d,5,char);
11556
11557         /* Check whether readline() is overridden */
11558         if ((gv_readline = gv_override("readline",8)))
11559             readline_overridden = TRUE;
11560
11561         /* if <$fh>, create the ops to turn the variable into a
11562            filehandle
11563         */
11564         if (*d == '$') {
11565             /* try to find it in the pad for this block, otherwise find
11566                add symbol table ops
11567             */
11568             const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11569             if (tmp != NOT_IN_PAD) {
11570                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11571                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11572                     HEK * const stashname = HvNAME_HEK(stash);
11573                     SV * const sym = newSVhek_mortal(stashname);
11574                     sv_catpvs(sym, "::");
11575                     sv_catpv(sym, d+1);
11576                     d = SvPVX(sym);
11577                     goto intro_sym;
11578                 }
11579                 else {
11580                     OP * const o = newPADxVOP(OP_PADSV, 0, tmp);
11581                     PL_lex_op = readline_overridden
11582                         ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11583                                 op_append_elem(OP_LIST, o,
11584                                     newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11585                         : newUNOP(OP_READLINE, 0, o);
11586                 }
11587             }
11588             else {
11589                 GV *gv;
11590                 ++d;
11591               intro_sym:
11592                 gv = gv_fetchpv(d,
11593                                 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11594                                 SVt_PV);
11595                 PL_lex_op = readline_overridden
11596                     ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11597                             op_append_elem(OP_LIST,
11598                                 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11599                                 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11600                     : newUNOP(OP_READLINE, 0,
11601                             newUNOP(OP_RV2SV, 0,
11602                                 newGVOP(OP_GV, 0, gv)));
11603             }
11604             /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11605             pl_yylval.ival = OP_NULL;
11606         }
11607
11608         /* If it's none of the above, it must be a literal filehandle
11609            (<Foo::BAR> or <FOO>) so build a simple readline OP */
11610         else {
11611             GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11612             PL_lex_op = readline_overridden
11613                 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11614                         op_append_elem(OP_LIST,
11615                             newGVOP(OP_GV, 0, gv),
11616                             newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11617                 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11618             pl_yylval.ival = OP_NULL;
11619
11620             /* leave the token generation above to avoid confusing the parser */
11621             if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11622                 no_bareword_filehandle(d);
11623             }
11624         }
11625     }
11626
11627     return s;
11628 }
11629
11630
11631 /* scan_str
11632    takes:
11633         start                   position in buffer
11634         keep_bracketed_quoted   preserve \ quoting of embedded delimiters, but
11635                                 only if they are of the open/close form
11636         keep_delims             preserve the delimiters around the string
11637         re_reparse              compiling a run-time /(?{})/:
11638                                    collapse // to /,  and skip encoding src
11639         delimp                  if non-null, this is set to the position of
11640                                 the closing delimiter, or just after it if
11641                                 the closing and opening delimiters differ
11642                                 (i.e., the opening delimiter of a substitu-
11643                                 tion replacement)
11644    returns: position to continue reading from buffer
11645    side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11646         updates the read buffer.
11647
11648    This subroutine pulls a string out of the input.  It is called for:
11649         q               single quotes           q(literal text)
11650         '               single quotes           'literal text'
11651         qq              double quotes           qq(interpolate $here please)
11652         "               double quotes           "interpolate $here please"
11653         qx              backticks               qx(/bin/ls -l)
11654         `               backticks               `/bin/ls -l`
11655         qw              quote words             @EXPORT_OK = qw( func() $spam )
11656         m//             regexp match            m/this/
11657         s///            regexp substitute       s/this/that/
11658         tr///           string transliterate    tr/this/that/
11659         y///            string transliterate    y/this/that/
11660         ($*@)           sub prototypes          sub foo ($)
11661         (stuff)         sub attr parameters     sub foo : attr(stuff)
11662         <>              readline or globs       <FOO>, <>, <$fh>, or <*.c>
11663
11664    In most of these cases (all but <>, patterns and transliterate)
11665    yylex() calls scan_str().  m// makes yylex() call scan_pat() which
11666    calls scan_str().  s/// makes yylex() call scan_subst() which calls
11667    scan_str().  tr/// and y/// make yylex() call scan_trans() which
11668    calls scan_str().
11669
11670    It skips whitespace before the string starts, and treats the first
11671    character as the delimiter.  If the delimiter is one of ([{< then
11672    the corresponding "close" character )]}> is used as the closing
11673    delimiter.  It allows quoting of delimiters, and if the string has
11674    balanced delimiters ([{<>}]) it allows nesting.
11675
11676    On success, the SV with the resulting string is put into lex_stuff or,
11677    if that is already non-NULL, into lex_repl. The second case occurs only
11678    when parsing the RHS of the special constructs s/// and tr/// (y///).
11679    For convenience, the terminating delimiter character is stuffed into
11680    SvIVX of the SV.
11681 */
11682
11683 char *
11684 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11685                  char **delimp
11686     )
11687 {
11688     SV *sv;                     /* scalar value: string */
11689     char *s = start;            /* current position in the buffer */
11690     char *to;                   /* current position in the sv's data */
11691     int brackets = 1;           /* bracket nesting level */
11692     bool d_is_utf8 = FALSE;     /* is there any utf8 content? */
11693     UV open_delim_code;         /* code point */
11694     char open_delim_str[UTF8_MAXBYTES+1];
11695     STRLEN delim_byte_len;      /* each delimiter currently is the same number
11696                                    of bytes */
11697     line_t herelines;
11698
11699     /* The only non-UTF character that isn't a stand alone grapheme is
11700      * white-space, hence can't be a delimiter. */
11701     const char * non_grapheme_msg = "Use of unassigned code point or"
11702                                     " non-standalone grapheme for a delimiter"
11703                                     " is not allowed";
11704     PERL_ARGS_ASSERT_SCAN_STR;
11705
11706     /* skip space before the delimiter */
11707     if (isSPACE(*s)) {  /* skipspace can change the buffer 's' is in, so
11708                            'start' also has to change */
11709         s = start = skipspace(s);
11710     }
11711
11712     /* mark where we are, in case we need to report errors */
11713     CLINE;
11714
11715     /* after skipping whitespace, the next character is the delimiter */
11716     if (! UTF || UTF8_IS_INVARIANT(*s)) {
11717         open_delim_code   = (U8) *s;
11718         open_delim_str[0] =      *s;
11719         delim_byte_len = 1;
11720     }
11721     else {
11722         open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11723                                             &delim_byte_len);
11724         if (UNLIKELY(! is_grapheme((U8 *) start,
11725                                    (U8 *) s,
11726                                    (U8 *) PL_bufend,
11727                                    open_delim_code)))
11728         {
11729             yyerror(non_grapheme_msg);
11730         }
11731
11732         Copy(s, open_delim_str, delim_byte_len, char);
11733     }
11734     open_delim_str[delim_byte_len] = '\0';  /* Only for safety */
11735
11736
11737     /* mark where we are */
11738     PL_multi_start = CopLINE(PL_curcop);
11739     PL_multi_open = open_delim_code;
11740     herelines = PL_parser->herelines;
11741
11742     const char * legal_paired_opening_delims;
11743     const char * legal_paired_closing_delims;
11744     const char * deprecated_opening_delims;
11745     if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11746         if (UTF) {
11747             legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11748             legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11749
11750             /* We are deprecating using a closing delimiter as the opening, in
11751              * case we want in the future to accept them reversed.  The string
11752              * may include ones that are legal, but the code below won't look
11753              * at this string unless it didn't find a legal opening one */
11754             deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11755         }
11756         else {
11757             legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11758             legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11759             deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11760         }
11761     }
11762     else {
11763         legal_paired_opening_delims = "([{<";
11764         legal_paired_closing_delims = ")]}>";
11765         deprecated_opening_delims = (UTF)
11766                                     ? DEPRECATED_OPENING_UTF8_BRACKETS
11767                                     : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11768     }
11769
11770     const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11771                                           + strlen(legal_paired_opening_delims);
11772     const char * deprecated_delims_end = deprecated_opening_delims
11773                                 + strlen(deprecated_opening_delims);
11774
11775     const char * close_delim_str = open_delim_str;
11776     UV close_delim_code = open_delim_code;
11777
11778     /* If the delimiter has a mirror-image closing one, get it */
11779     const char *tmps = ninstr(legal_paired_opening_delims,
11780                               legal_paired_opening_delims_end,
11781                               open_delim_str, open_delim_str + delim_byte_len);
11782     if (tmps) {
11783         /* Here, there is a paired delimiter, and tmps points to its position
11784            in the string of the accepted opening paired delimiters.  The
11785            corresponding position in the string of closing ones is the
11786            beginning of the paired mate.  Both contain the same number of
11787            bytes. */
11788         close_delim_str = legal_paired_closing_delims
11789                         + (tmps - legal_paired_opening_delims);
11790
11791         /* The list of paired delimiters contains all the ASCII ones that have
11792          * always been legal, and no other ASCIIs.  Don't raise a message if
11793          * using one of these */
11794         if (! isASCII(open_delim_code)) {
11795             Perl_ck_warner_d(aTHX_
11796                              packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11797                              "Use of '%" UTF8f "' is experimental as a string delimiter",
11798                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11799         }
11800
11801         close_delim_code = (UTF)
11802                            ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11803                            : * (U8 *) close_delim_str;
11804     }
11805     else {  /* Here, the delimiter isn't paired, hence the close is the same as
11806                the open; and has already been set up.  But make sure it isn't
11807                deprecated to use this particular delimiter, as we plan
11808                eventually to make it paired. */
11809         if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11810                    open_delim_str, open_delim_str + delim_byte_len))
11811         {
11812             Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED__DELIMITER_WILL_BE_PAIRED),
11813                              "Use of '%" UTF8f "' is deprecated as a string delimiter",
11814                              UTF8fARG(UTF, delim_byte_len, open_delim_str));
11815         }
11816
11817         /* Note that a NUL may be used as a delimiter, and this happens when
11818          * delimiting an empty string, and no special handling for it is
11819          * needed, as ninstr() calls are used */
11820     }
11821
11822     PL_multi_close = close_delim_code;
11823
11824     if (PL_multi_open == PL_multi_close) {
11825         keep_bracketed_quoted = FALSE;
11826     }
11827
11828     /* create a new SV to hold the contents.  79 is the SV's initial length.
11829        What a random number. */
11830     sv = newSV_type(SVt_PVIV);
11831     sv_grow_fresh(sv, 79);
11832     SvIV_set(sv, close_delim_code);
11833     (void)SvPOK_only(sv);               /* validate pointer */
11834
11835     /* move past delimiter and try to read a complete string */
11836     if (keep_delims)
11837         sv_catpvn(sv, s, delim_byte_len);
11838     s += delim_byte_len;
11839     for (;;) {
11840         /* extend sv if need be */
11841         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11842         /* set 'to' to the next character in the sv's string */
11843         to = SvPVX(sv)+SvCUR(sv);
11844
11845         /* read until we run out of string, or we find the closing delimiter */
11846         while (s < PL_bufend) {
11847             /* embedded newlines increment the line count */
11848             if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11849                 COPLINE_INC_WITH_HERELINES;
11850
11851             /* backslashes can escape the closing delimiter */
11852             if (   *s == '\\' && s < PL_bufend - delim_byte_len
11853
11854                    /* ... but not if the delimiter itself is a backslash */
11855                 && close_delim_code != '\\')
11856             {
11857                 /* Here, we have an escaping backslash.  If we're supposed to
11858                  * discard those that escape the closing delimiter, just
11859                  * discard this one */
11860                 if (   !  keep_bracketed_quoted
11861                     &&   (    memEQ(s + 1,  open_delim_str, delim_byte_len)
11862                           ||  (   PL_multi_open == PL_multi_close
11863                                && re_reparse && s[1] == '\\')
11864                           ||  memEQ(s + 1, close_delim_str, delim_byte_len)))
11865                 {
11866                     s++;
11867                 }
11868                 else /* any other escapes are simply copied straight through */
11869                     *to++ = *s++;
11870             }
11871             else if (   s < PL_bufend - (delim_byte_len - 1)
11872                      && memEQ(s, close_delim_str, delim_byte_len)
11873                      && --brackets <= 0)
11874             {
11875                 /* Found unescaped closing delimiter, unnested if we care about
11876                  * that; so are done.
11877                  *
11878                  * In the case of the opening and closing delimiters being
11879                  * different, we have to deal with nesting; the conditional
11880                  * above makes sure we don't get here until the nesting level,
11881                  * 'brackets', is back down to zero.  In the other case,
11882                  * nesting isn't an issue, and 'brackets' never can get
11883                  * incremented above 0, so will come here at the first closing
11884                  * delimiter.
11885                  *
11886                  * Only grapheme delimiters are legal. */
11887                 if (   UTF  /* All Non-UTF-8's are graphemes */
11888                     && UNLIKELY(! is_grapheme((U8 *) start,
11889                                               (U8 *) s,
11890                                               (U8 *) PL_bufend,
11891                                               close_delim_code)))
11892                 {
11893                     yyerror(non_grapheme_msg);
11894                 }
11895
11896                 break;
11897             }
11898                         /* No nesting if open eq close */
11899             else if (   PL_multi_open != PL_multi_close
11900                      && s < PL_bufend - (delim_byte_len - 1)
11901                      && memEQ(s, open_delim_str, delim_byte_len))
11902             {
11903                 brackets++;
11904             }
11905
11906             /* Here, still in the middle of the string; copy this character */
11907             if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11908                 *to++ = *s++;
11909             }
11910             else {
11911                 size_t this_char_len = UTF8SKIP(s);
11912                 Copy(s, to, this_char_len, char);
11913                 s  += this_char_len;
11914                 to += this_char_len;
11915
11916                 d_is_utf8 = TRUE;
11917             }
11918         } /* End of loop through buffer */
11919
11920         /* Here, found end of the string, OR ran out of buffer: terminate the
11921          * copied string and update the sv's end-of-string */
11922         *to = '\0';
11923         SvCUR_set(sv, to - SvPVX_const(sv));
11924
11925         /*
11926          * this next chunk reads more into the buffer if we're not done yet
11927          */
11928
11929         if (s < PL_bufend)
11930             break;              /* handle case where we are done yet :-) */
11931
11932 #ifndef PERL_STRICT_CR
11933         if (to - SvPVX_const(sv) >= 2) {
11934             if (   (to[-2] == '\r' && to[-1] == '\n')
11935                 || (to[-2] == '\n' && to[-1] == '\r'))
11936             {
11937                 to[-2] = '\n';
11938                 to--;
11939                 SvCUR_set(sv, to - SvPVX_const(sv));
11940             }
11941             else if (to[-1] == '\r')
11942                 to[-1] = '\n';
11943         }
11944         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11945             to[-1] = '\n';
11946 #endif
11947
11948         /* if we're out of file, or a read fails, bail and reset the current
11949            line marker so we can report where the unterminated string began
11950         */
11951         COPLINE_INC_WITH_HERELINES;
11952         PL_bufptr = PL_bufend;
11953         if (!lex_next_chunk(0)) {
11954             ASSUME(sv);
11955             SvREFCNT_dec(sv);
11956             CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11957             return NULL;
11958         }
11959         s = start = PL_bufptr;
11960     } /* End of infinite loop */
11961
11962     /* at this point, we have successfully read the delimited string */
11963
11964     if (keep_delims)
11965             sv_catpvn(sv, s, delim_byte_len);
11966     s += delim_byte_len;
11967
11968     if (d_is_utf8)
11969         SvUTF8_on(sv);
11970
11971     PL_multi_end = CopLINE(PL_curcop);
11972     CopLINE_set(PL_curcop, PL_multi_start);
11973     PL_parser->herelines = herelines;
11974
11975     /* if we allocated too much space, give some back */
11976     if (SvCUR(sv) + 5 < SvLEN(sv)) {
11977         SvLEN_set(sv, SvCUR(sv) + 1);
11978         SvPV_shrink_to_cur(sv);
11979     }
11980
11981     /* decide whether this is the first or second quoted string we've read
11982        for this op
11983     */
11984
11985     if (PL_lex_stuff)
11986         PL_parser->lex_sub_repl = sv;
11987     else
11988         PL_lex_stuff = sv;
11989     if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11990     return s;
11991 }
11992
11993 /*
11994   scan_num
11995   takes: pointer to position in buffer
11996   returns: pointer to new position in buffer
11997   side-effects: builds ops for the constant in pl_yylval.op
11998
11999   Read a number in any of the formats that Perl accepts:
12000
12001   \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*)      12 12.34 12.
12002   \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*)                     .34
12003   0b[01](_?[01])*                                       binary integers
12004   0o?[0-7](_?[0-7])*                                    octal integers
12005   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*                         hexadecimal integers
12006   0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+   hexadecimal floats
12007
12008   Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
12009   thing it reads.
12010
12011   If it reads a number without a decimal point or an exponent, it will
12012   try converting the number to an integer and see if it can do so
12013   without loss of precision.
12014 */
12015
12016 char *
12017 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
12018 {
12019     const char *s = start;      /* current position in buffer */
12020     char *d;                    /* destination in temp buffer */
12021     char *e;                    /* end of temp buffer */
12022     NV nv;                              /* number read, as a double */
12023     SV *sv = NULL;                      /* place to put the converted number */
12024     bool floatit;                       /* boolean: int or float? */
12025     const char *lastub = NULL;          /* position of last underbar */
12026     static const char* const number_too_long = "Number too long";
12027     bool warned_about_underscore = 0;
12028     I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
12029 #define WARN_ABOUT_UNDERSCORE() \
12030         do { \
12031             if (!warned_about_underscore) { \
12032                 warned_about_underscore = 1; \
12033                 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
12034                                "Misplaced _ in number"); \
12035             } \
12036         } while(0)
12037     /* Hexadecimal floating point.
12038      *
12039      * In many places (where we have quads and NV is IEEE 754 double)
12040      * we can fit the mantissa bits of a NV into an unsigned quad.
12041      * (Note that UVs might not be quads even when we have quads.)
12042      * This will not work everywhere, though (either no quads, or
12043      * using long doubles), in which case we have to resort to NV,
12044      * which will probably mean horrible loss of precision due to
12045      * multiple fp operations. */
12046     bool hexfp = FALSE;
12047     int total_bits = 0;
12048     int significant_bits = 0;
12049 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
12050 #  define HEXFP_UQUAD
12051     Uquad_t hexfp_uquad = 0;
12052     int hexfp_frac_bits = 0;
12053 #else
12054 #  define HEXFP_NV
12055     NV hexfp_nv = 0.0;
12056 #endif
12057     NV hexfp_mult = 1.0;
12058     UV high_non_zero = 0; /* highest digit */
12059     int non_zero_integer_digits = 0;
12060     bool new_octal = FALSE;     /* octal with "0o" prefix */
12061
12062     PERL_ARGS_ASSERT_SCAN_NUM;
12063
12064     /* We use the first character to decide what type of number this is */
12065
12066     switch (*s) {
12067     default:
12068         Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
12069
12070     /* if it starts with a 0, it could be an octal number, a decimal in
12071        0.13 disguise, or a hexadecimal number, or a binary number. */
12072     case '0':
12073         {
12074           /* variables:
12075              u          holds the "number so far"
12076              overflowed was the number more than we can hold?
12077
12078              Shift is used when we add a digit.  It also serves as an "are
12079              we in octal/hex/binary?" indicator to disallow hex characters
12080              when in octal mode.
12081            */
12082             NV n = 0.0;
12083             UV u = 0;
12084             bool overflowed = FALSE;
12085             bool just_zero  = TRUE;     /* just plain 0 or binary number? */
12086             bool has_digs = FALSE;
12087             static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12088             static const char* const bases[5] =
12089               { "", "binary", "", "octal", "hexadecimal" };
12090             static const char* const Bases[5] =
12091               { "", "Binary", "", "Octal", "Hexadecimal" };
12092             static const char* const maxima[5] =
12093               { "",
12094                 "0b11111111111111111111111111111111",
12095                 "",
12096                 "037777777777",
12097                 "0xffffffff" };
12098
12099             /* check for hex */
12100             if (isALPHA_FOLD_EQ(s[1], 'x')) {
12101                 shift = 4;
12102                 s += 2;
12103                 just_zero = FALSE;
12104             } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
12105                 shift = 1;
12106                 s += 2;
12107                 just_zero = FALSE;
12108             }
12109             /* check for a decimal in disguise */
12110             else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
12111                 goto decimal;
12112             /* so it must be octal */
12113             else {
12114                 shift = 3;
12115                 s++;
12116                 if (isALPHA_FOLD_EQ(*s, 'o')) {
12117                     s++;
12118                     just_zero = FALSE;
12119                     new_octal = TRUE;
12120                 }
12121             }
12122
12123             if (*s == '_') {
12124                 WARN_ABOUT_UNDERSCORE();
12125                lastub = s++;
12126             }
12127
12128             /* read the rest of the number */
12129             for (;;) {
12130                 /* x is used in the overflow test,
12131                    b is the digit we're adding on. */
12132                 UV x, b;
12133
12134                 switch (*s) {
12135
12136                 /* if we don't mention it, we're done */
12137                 default:
12138                     goto out;
12139
12140                 /* _ are ignored -- but warned about if consecutive */
12141                 case '_':
12142                     if (lastub && s == lastub + 1)
12143                         WARN_ABOUT_UNDERSCORE();
12144                     lastub = s++;
12145                     break;
12146
12147                 /* 8 and 9 are not octal */
12148                 case '8': case '9':
12149                     if (shift == 3)
12150                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
12151                     /* FALLTHROUGH */
12152
12153                 /* octal digits */
12154                 case '2': case '3': case '4':
12155                 case '5': case '6': case '7':
12156                     if (shift == 1)
12157                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
12158                     /* FALLTHROUGH */
12159
12160                 case '0': case '1':
12161                     b = *s++ & 15;              /* ASCII digit -> value of digit */
12162                     goto digit;
12163
12164                 /* hex digits */
12165                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12166                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
12167                     /* make sure they said 0x */
12168                     if (shift != 4)
12169                         goto out;
12170                     b = (*s++ & 7) + 9;
12171
12172                     /* Prepare to put the digit we have onto the end
12173                        of the number so far.  We check for overflows.
12174                     */
12175
12176                   digit:
12177                     just_zero = FALSE;
12178                     has_digs = TRUE;
12179                     if (!overflowed) {
12180                         assert(shift >= 0);
12181                         x = u << shift; /* make room for the digit */
12182
12183                         total_bits += shift;
12184
12185                         if ((x >> shift) != u
12186                             && !(PL_hints & HINT_NEW_BINARY)) {
12187                             overflowed = TRUE;
12188                             n = (NV) u;
12189                             Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12190                                              "Integer overflow in %s number",
12191                                              bases[shift]);
12192                         } else
12193                             u = x | b;          /* add the digit to the end */
12194                     }
12195                     if (overflowed) {
12196                         n *= nvshift[shift];
12197                         /* If an NV has not enough bits in its
12198                          * mantissa to represent an UV this summing of
12199                          * small low-order numbers is a waste of time
12200                          * (because the NV cannot preserve the
12201                          * low-order bits anyway): we could just
12202                          * remember when did we overflow and in the
12203                          * end just multiply n by the right
12204                          * amount. */
12205                         n += (NV) b;
12206                     }
12207
12208                     if (high_non_zero == 0 && b > 0)
12209                         high_non_zero = b;
12210
12211                     if (high_non_zero)
12212                         non_zero_integer_digits++;
12213
12214                     /* this could be hexfp, but peek ahead
12215                      * to avoid matching ".." */
12216                     if (UNLIKELY(HEXFP_PEEK(s))) {
12217                         goto out;
12218                     }
12219
12220                     break;
12221                 }
12222             }
12223
12224           /* if we get here, we had success: make a scalar value from
12225              the number.
12226           */
12227           out:
12228
12229             /* final misplaced underbar check */
12230             if (s[-1] == '_')
12231                 WARN_ABOUT_UNDERSCORE();
12232
12233             if (UNLIKELY(HEXFP_PEEK(s))) {
12234                 /* Do sloppy (on the underbars) but quick detection
12235                  * (and value construction) for hexfp, the decimal
12236                  * detection will shortly be more thorough with the
12237                  * underbar checks. */
12238                 const char* h = s;
12239                 significant_bits = non_zero_integer_digits * shift;
12240 #ifdef HEXFP_UQUAD
12241                 hexfp_uquad = u;
12242 #else /* HEXFP_NV */
12243                 hexfp_nv = u;
12244 #endif
12245                 /* Ignore the leading zero bits of
12246                  * the high (first) non-zero digit. */
12247                 if (high_non_zero) {
12248                     if (high_non_zero < 0x8)
12249                         significant_bits--;
12250                     if (high_non_zero < 0x4)
12251                         significant_bits--;
12252                     if (high_non_zero < 0x2)
12253                         significant_bits--;
12254                 }
12255
12256                 if (*h == '.') {
12257 #ifdef HEXFP_NV
12258                     NV nv_mult = 1.0;
12259 #endif
12260                     bool accumulate = TRUE;
12261                     U8 b = 0; /* silence compiler warning */
12262                     int lim = 1 << shift;
12263                     for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
12264                                *h == '_'); h++) {
12265                         if (isXDIGIT(*h)) {
12266                             significant_bits += shift;
12267 #ifdef HEXFP_UQUAD
12268                             if (accumulate) {
12269                                 if (significant_bits < NV_MANT_DIG) {
12270                                     /* We are in the long "run" of xdigits,
12271                                      * accumulate the full four bits. */
12272                                     assert(shift >= 0);
12273                                     hexfp_uquad <<= shift;
12274                                     hexfp_uquad |= b;
12275                                     hexfp_frac_bits += shift;
12276                                 } else if (significant_bits - shift < NV_MANT_DIG) {
12277                                     /* We are at a hexdigit either at,
12278                                      * or straddling, the edge of mantissa.
12279                                      * We will try grabbing as many as
12280                                      * possible bits. */
12281                                     int tail =
12282                                       significant_bits - NV_MANT_DIG;
12283                                     if (tail <= 0)
12284                                        tail += shift;
12285                                     assert(tail >= 0);
12286                                     hexfp_uquad <<= tail;
12287                                     assert((shift - tail) >= 0);
12288                                     hexfp_uquad |= b >> (shift - tail);
12289                                     hexfp_frac_bits += tail;
12290
12291                                     /* Ignore the trailing zero bits
12292                                      * of the last non-zero xdigit.
12293                                      *
12294                                      * The assumption here is that if
12295                                      * one has input of e.g. the xdigit
12296                                      * eight (0x8), there is only one
12297                                      * bit being input, not the full
12298                                      * four bits.  Conversely, if one
12299                                      * specifies a zero xdigit, the
12300                                      * assumption is that one really
12301                                      * wants all those bits to be zero. */
12302                                     if (b) {
12303                                         if ((b & 0x1) == 0x0) {
12304                                             significant_bits--;
12305                                             if ((b & 0x2) == 0x0) {
12306                                                 significant_bits--;
12307                                                 if ((b & 0x4) == 0x0) {
12308                                                     significant_bits--;
12309                                                 }
12310                                             }
12311                                         }
12312                                     }
12313
12314                                     accumulate = FALSE;
12315                                 }
12316                             } else {
12317                                 /* Keep skipping the xdigits, and
12318                                  * accumulating the significant bits,
12319                                  * but do not shift the uquad
12320                                  * (which would catastrophically drop
12321                                  * high-order bits) or accumulate the
12322                                  * xdigits anymore. */
12323                             }
12324 #else /* HEXFP_NV */
12325                             if (accumulate) {
12326                                 nv_mult /= nvshift[shift];
12327                                 if (nv_mult > 0.0)
12328                                     hexfp_nv += b * nv_mult;
12329                                 else
12330                                     accumulate = FALSE;
12331                             }
12332 #endif
12333                         }
12334                         if (significant_bits >= NV_MANT_DIG)
12335                             accumulate = FALSE;
12336                     }
12337                 }
12338
12339                 if ((total_bits > 0 || significant_bits > 0) &&
12340                     isALPHA_FOLD_EQ(*h, 'p')) {
12341                     bool negexp = FALSE;
12342                     h++;
12343                     if (*h == '+')
12344                         h++;
12345                     else if (*h == '-') {
12346                         negexp = TRUE;
12347                         h++;
12348                     }
12349                     if (isDIGIT(*h)) {
12350                         I32 hexfp_exp = 0;
12351                         while (isDIGIT(*h) || *h == '_') {
12352                             if (isDIGIT(*h)) {
12353                                 hexfp_exp *= 10;
12354                                 hexfp_exp += *h - '0';
12355 #ifdef NV_MIN_EXP
12356                                 if (negexp
12357                                     && -hexfp_exp < NV_MIN_EXP - 1) {
12358                                     /* NOTE: this means that the exponent
12359                                      * underflow warning happens for
12360                                      * the IEEE 754 subnormals (denormals),
12361                                      * because DBL_MIN_EXP etc are the lowest
12362                                      * possible binary (or, rather, DBL_RADIX-base)
12363                                      * exponent for normals, not subnormals.
12364                                      *
12365                                      * This may or may not be a good thing. */
12366                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12367                                                    "Hexadecimal float: exponent underflow");
12368                                     break;
12369                                 }
12370 #endif
12371 #ifdef NV_MAX_EXP
12372                                 if (!negexp
12373                                     && hexfp_exp > NV_MAX_EXP - 1) {
12374                                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12375                                                    "Hexadecimal float: exponent overflow");
12376                                     break;
12377                                 }
12378 #endif
12379                             }
12380                             h++;
12381                         }
12382                         if (negexp)
12383                             hexfp_exp = -hexfp_exp;
12384 #ifdef HEXFP_UQUAD
12385                         hexfp_exp -= hexfp_frac_bits;
12386 #endif
12387                         hexfp_mult = Perl_pow(2.0, hexfp_exp);
12388                         hexfp = TRUE;
12389                         goto decimal;
12390                     }
12391                 }
12392             }
12393
12394             if (!just_zero && !has_digs) {
12395                 /* 0x, 0o or 0b with no digits, treat it as an error.
12396                    Originally this backed up the parse before the b or
12397                    x, but that has the potential for silent changes in
12398                    behaviour, like for: "0x.3" and "0x+$foo".
12399                 */
12400                 const char *d = s;
12401                 char *oldbp = PL_bufptr;
12402                 if (*d) ++d; /* so the user sees the bad non-digit */
12403                 PL_bufptr = (char *)d; /* so yyerror reports the context */
12404                 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12405                                   bases[shift]));
12406                 PL_bufptr = oldbp;
12407             }
12408
12409             if (overflowed) {
12410                 if (n > 4294967295.0)
12411                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12412                                    "%s number > %s non-portable",
12413                                    Bases[shift],
12414                                    new_octal ? "0o37777777777" : maxima[shift]);
12415                 sv = newSVnv(n);
12416             }
12417             else {
12418 #if UVSIZE > 4
12419                 if (u > 0xffffffff)
12420                     Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12421                                    "%s number > %s non-portable",
12422                                    Bases[shift],
12423                                    new_octal ? "0o37777777777" : maxima[shift]);
12424 #endif
12425                 sv = newSVuv(u);
12426             }
12427             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12428                 sv = new_constant(start, s - start, "integer",
12429                                   sv, NULL, NULL, 0, NULL);
12430             else if (PL_hints & HINT_NEW_BINARY)
12431                 sv = new_constant(start, s - start, "binary",
12432                                   sv, NULL, NULL, 0, NULL);
12433         }
12434         break;
12435
12436     /*
12437       handle decimal numbers.
12438       we're also sent here when we read a 0 as the first digit
12439     */
12440     case '1': case '2': case '3': case '4': case '5':
12441     case '6': case '7': case '8': case '9': case '.':
12442       decimal:
12443         d = PL_tokenbuf;
12444         e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12445         floatit = FALSE;
12446         if (hexfp) {
12447             floatit = TRUE;
12448             *d++ = '0';
12449             switch (shift) {
12450             case 4:
12451                 *d++ = 'x';
12452                 s = start + 2;
12453                 break;
12454             case 3:
12455                 if (new_octal) {
12456                     *d++ = 'o';
12457                     s = start + 2;
12458                     break;
12459                 }
12460                 s = start + 1;
12461                 break;
12462             case 1:
12463                 *d++ = 'b';
12464                 s = start + 2;
12465                 break;
12466             default:
12467                 NOT_REACHED; /* NOTREACHED */
12468             }
12469         }
12470
12471         /* read next group of digits and _ and copy into d */
12472         while (isDIGIT(*s)
12473                || *s == '_'
12474                || UNLIKELY(hexfp && isXDIGIT(*s)))
12475         {
12476             /* skip underscores, checking for misplaced ones
12477                if -w is on
12478             */
12479             if (*s == '_') {
12480                 if (lastub && s == lastub + 1)
12481                     WARN_ABOUT_UNDERSCORE();
12482                 lastub = s++;
12483             }
12484             else {
12485                 /* check for end of fixed-length buffer */
12486                 if (d >= e)
12487                     Perl_croak(aTHX_ "%s", number_too_long);
12488                 /* if we're ok, copy the character */
12489                 *d++ = *s++;
12490             }
12491         }
12492
12493         /* final misplaced underbar check */
12494         if (lastub && s == lastub + 1)
12495             WARN_ABOUT_UNDERSCORE();
12496
12497         /* read a decimal portion if there is one.  avoid
12498            3..5 being interpreted as the number 3. followed
12499            by .5
12500         */
12501         if (*s == '.' && s[1] != '.') {
12502             floatit = TRUE;
12503             *d++ = *s++;
12504
12505             if (*s == '_') {
12506                 WARN_ABOUT_UNDERSCORE();
12507                 lastub = s;
12508             }
12509
12510             /* copy, ignoring underbars, until we run out of digits.
12511             */
12512             for (; isDIGIT(*s)
12513                    || *s == '_'
12514                    || UNLIKELY(hexfp && isXDIGIT(*s));
12515                  s++)
12516             {
12517                 /* fixed length buffer check */
12518                 if (d >= e)
12519                     Perl_croak(aTHX_ "%s", number_too_long);
12520                 if (*s == '_') {
12521                    if (lastub && s == lastub + 1)
12522                         WARN_ABOUT_UNDERSCORE();
12523                    lastub = s;
12524                 }
12525                 else
12526                     *d++ = *s;
12527             }
12528             /* fractional part ending in underbar? */
12529             if (s[-1] == '_')
12530                 WARN_ABOUT_UNDERSCORE();
12531             if (*s == '.' && isDIGIT(s[1])) {
12532                 /* oops, it's really a v-string, but without the "v" */
12533                 s = start;
12534                 goto vstring;
12535             }
12536         }
12537
12538         /* read exponent part, if present */
12539         if ((isALPHA_FOLD_EQ(*s, 'e')
12540               || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12541             && memCHRs("+-0123456789_", s[1]))
12542         {
12543             int exp_digits = 0;
12544             const char *save_s = s;
12545             char * save_d = d;
12546
12547             /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12548                ditto for p (hexfloats) */
12549             if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12550                 /* At least some Mach atof()s don't grok 'E' */
12551                 *d++ = 'e';
12552             }
12553             else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12554                 *d++ = 'p';
12555             }
12556
12557             s++;
12558
12559
12560             /* stray preinitial _ */
12561             if (*s == '_') {
12562                 WARN_ABOUT_UNDERSCORE();
12563                 lastub = s++;
12564             }
12565
12566             /* allow positive or negative exponent */
12567             if (*s == '+' || *s == '-')
12568                 *d++ = *s++;
12569
12570             /* stray initial _ */
12571             if (*s == '_') {
12572                 WARN_ABOUT_UNDERSCORE();
12573                 lastub = s++;
12574             }
12575
12576             /* read digits of exponent */
12577             while (isDIGIT(*s) || *s == '_') {
12578                 if (isDIGIT(*s)) {
12579                     ++exp_digits;
12580                     if (d >= e)
12581                         Perl_croak(aTHX_ "%s", number_too_long);
12582                     *d++ = *s++;
12583                 }
12584                 else {
12585                    if (((lastub && s == lastub + 1)
12586                         || (!isDIGIT(s[1]) && s[1] != '_')))
12587                         WARN_ABOUT_UNDERSCORE();
12588                    lastub = s++;
12589                 }
12590             }
12591
12592             if (!exp_digits) {
12593                 /* no exponent digits, the [eEpP] could be for something else,
12594                  * though in practice we don't get here for p since that's preparsed
12595                  * earlier, and results in only the 0xX being consumed, so behave similarly
12596                  * for decimal floats and consume only the D.DD, leaving the [eE] to the
12597                  * next token.
12598                  */
12599                 s = save_s;
12600                 d = save_d;
12601             }
12602             else {
12603                 floatit = TRUE;
12604             }
12605         }
12606
12607
12608         /*
12609            We try to do an integer conversion first if no characters
12610            indicating "float" have been found.
12611          */
12612
12613         if (!floatit) {
12614             UV uv;
12615             const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12616
12617             if (flags == IS_NUMBER_IN_UV) {
12618               if (uv <= IV_MAX)
12619                 sv = newSViv(uv); /* Prefer IVs over UVs. */
12620               else
12621                 sv = newSVuv(uv);
12622             } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12623               if (uv <= (UV) IV_MIN)
12624                 sv = newSViv(-(IV)uv);
12625               else
12626                 floatit = TRUE;
12627             } else
12628               floatit = TRUE;
12629         }
12630         if (floatit) {
12631             /* terminate the string */
12632             *d = '\0';
12633             if (UNLIKELY(hexfp)) {
12634 #  ifdef NV_MANT_DIG
12635                 if (significant_bits > NV_MANT_DIG)
12636                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12637                                    "Hexadecimal float: mantissa overflow");
12638 #  endif
12639 #ifdef HEXFP_UQUAD
12640                 nv = hexfp_uquad * hexfp_mult;
12641 #else /* HEXFP_NV */
12642                 nv = hexfp_nv * hexfp_mult;
12643 #endif
12644             } else {
12645                 nv = Atof(PL_tokenbuf);
12646             }
12647             sv = newSVnv(nv);
12648         }
12649
12650         if ( floatit
12651              ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12652             const char *const key = floatit ? "float" : "integer";
12653             const STRLEN keylen = floatit ? 5 : 7;
12654             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12655                                 key, keylen, sv, NULL, NULL, 0, NULL);
12656         }
12657         break;
12658
12659     /* if it starts with a v, it could be a v-string */
12660     case 'v':
12661     vstring:
12662                 sv = newSV(5); /* preallocate storage space */
12663                 ENTER_with_name("scan_vstring");
12664                 SAVEFREESV(sv);
12665                 s = scan_vstring(s, PL_bufend, sv);
12666                 SvREFCNT_inc_simple_void_NN(sv);
12667                 LEAVE_with_name("scan_vstring");
12668         break;
12669     }
12670
12671     /* make the op for the constant and return */
12672
12673     if (sv)
12674         lvalp->opval = newSVOP(OP_CONST, 0, sv);
12675     else
12676         lvalp->opval = NULL;
12677
12678     return (char *)s;
12679 }
12680
12681 STATIC char *
12682 S_scan_formline(pTHX_ char *s)
12683 {
12684     SV * const stuff = newSVpvs("");
12685     bool needargs = FALSE;
12686     bool eofmt = FALSE;
12687
12688     PERL_ARGS_ASSERT_SCAN_FORMLINE;
12689
12690     while (!needargs) {
12691         char *eol;
12692         if (*s == '.') {
12693             char *t = s+1;
12694 #ifdef PERL_STRICT_CR
12695             while (SPACE_OR_TAB(*t))
12696                 t++;
12697 #else
12698             while (SPACE_OR_TAB(*t) || *t == '\r')
12699                 t++;
12700 #endif
12701             if (*t == '\n' || t == PL_bufend) {
12702                 eofmt = TRUE;
12703                 break;
12704             }
12705         }
12706         eol = (char *) memchr(s,'\n',PL_bufend-s);
12707         if (! eol) {
12708             eol = PL_bufend;
12709         }
12710         else {
12711             eol++;
12712         }
12713         if (*s != '#') {
12714             char *t;
12715             for (t = s; t < eol; t++) {
12716                 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12717                     needargs = FALSE;
12718                     goto enough;        /* ~~ must be first line in formline */
12719                 }
12720                 if (*t == '@' || *t == '^')
12721                     needargs = TRUE;
12722             }
12723             if (eol > s) {
12724                 sv_catpvn(stuff, s, eol-s);
12725 #ifndef PERL_STRICT_CR
12726                 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12727                     char *end = SvPVX(stuff) + SvCUR(stuff);
12728                     end[-2] = '\n';
12729                     end[-1] = '\0';
12730                     SvCUR_set(stuff, SvCUR(stuff) - 1);
12731                 }
12732 #endif
12733             }
12734             else
12735               break;
12736         }
12737         s = (char*)eol;
12738         if ((PL_rsfp || PL_parser->filtered)
12739          && PL_parser->form_lex_state == LEX_NORMAL) {
12740             bool got_some;
12741             PL_bufptr = PL_bufend;
12742             COPLINE_INC_WITH_HERELINES;
12743             got_some = lex_next_chunk(0);
12744             CopLINE_dec(PL_curcop);
12745             s = PL_bufptr;
12746             if (!got_some)
12747                 break;
12748         }
12749         incline(s, PL_bufend);
12750     }
12751   enough:
12752     if (!SvCUR(stuff) || needargs)
12753         PL_lex_state = PL_parser->form_lex_state;
12754     if (SvCUR(stuff)) {
12755         PL_expect = XSTATE;
12756         if (needargs) {
12757             const char *s2 = s;
12758             while (isSPACE(*s2) && *s2 != '\n')
12759                 s2++;
12760             if (*s2 == '{') {
12761                 PL_expect = XTERMBLOCK;
12762                 NEXTVAL_NEXTTOKE.ival = 0;
12763                 force_next(KW_DO);
12764             }
12765             NEXTVAL_NEXTTOKE.ival = 0;
12766             force_next(FORMLBRACK);
12767         }
12768         if (!IN_BYTES) {
12769             if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12770                 SvUTF8_on(stuff);
12771         }
12772         NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12773         force_next(THING);
12774     }
12775     else {
12776         SvREFCNT_dec(stuff);
12777         if (eofmt)
12778             PL_lex_formbrack = 0;
12779     }
12780     return s;
12781 }
12782
12783 /*
12784 =for apidoc start_subparse
12785
12786 Set things up for parsing a subroutine.
12787
12788 If C<is_format> is non-zero, the input is to be considered a format sub
12789 (a specialised sub used to implement perl's C<format> feature); else a
12790 normal C<sub>.
12791
12792 C<flags> are added to the flags for C<PL_compcv>.  C<flags> may include the
12793 C<CVf_IsMETHOD> bit, which causes the new subroutine to be a method.
12794
12795 This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12796 the function;
12797
12798 =cut
12799 */
12800
12801 I32
12802 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12803 {
12804     const I32 oldsavestack_ix = PL_savestack_ix;
12805     CV* const outsidecv = PL_compcv;
12806     bool is_method = flags & CVf_IsMETHOD;
12807
12808     if (is_method)
12809         croak_kw_unless_class("method");
12810
12811     SAVEI32(PL_subline);
12812     save_item(PL_subname);
12813     SAVESPTR(PL_compcv);
12814
12815     PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12816     CvFLAGS(PL_compcv) |= flags;
12817
12818     PL_subline = CopLINE(PL_curcop);
12819     CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12820     CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12821     CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12822     if (outsidecv && CvPADLIST(outsidecv))
12823         CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12824     if (is_method)
12825         class_prepare_method_parse(PL_compcv);
12826
12827     return oldsavestack_ix;
12828 }
12829
12830 /* If o represents a builtin attribute, apply it to cv and returns true.
12831  * Otherwise does nothing and returns false
12832  */
12833
12834 STATIC bool
12835 S_apply_builtin_cv_attribute(pTHX_ CV *cv, OP *o)
12836 {
12837     assert(o->op_type == OP_CONST);
12838     SV *sv = cSVOPo_sv;
12839     STRLEN len = SvCUR(sv);
12840
12841     /* NOTE: any CV attrs applied here need to be part of
12842        the CVf_BUILTIN_ATTRS define in cv.h! */
12843
12844     if(memEQs(SvPVX(sv), len, "lvalue"))
12845         CvLVALUE_on(cv);
12846     else if(memEQs(SvPVX(sv), len, "method"))
12847         CvNOWARN_AMBIGUOUS_on(cv);
12848     else if(memEQs(SvPVX(sv), len, "const")) {
12849         Perl_ck_warner_d(aTHX_
12850             packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
12851            ":const is experimental"
12852         );
12853         CvANONCONST_on(cv);
12854         if (!CvANON(cv))
12855             yyerror(":const is not permitted on named subroutines");
12856     }
12857     else
12858         return false;
12859
12860     return true;
12861 }
12862
12863 /*
12864 =for apidoc apply_builtin_cv_attributes
12865
12866 Given an OP_LIST containing attribute definitions, filter it for known builtin
12867 attributes to apply to the cv, returning a possibly-smaller list containing
12868 just the remaining ones.
12869
12870 =cut
12871 */
12872
12873 OP *
12874 Perl_apply_builtin_cv_attributes(pTHX_ CV *cv, OP *attrlist)
12875 {
12876     PERL_ARGS_ASSERT_APPLY_BUILTIN_CV_ATTRIBUTES;
12877
12878     if(!attrlist)
12879         return attrlist;
12880
12881     if(attrlist->op_type != OP_LIST) {
12882         /* Not in fact a list but just a single attribute */
12883         if(S_apply_builtin_cv_attribute(aTHX_ cv, attrlist)) {
12884             op_free(attrlist);
12885             return NULL;
12886         }
12887
12888         return attrlist;
12889     }
12890
12891     OP *prev = cLISTOPx(attrlist)->op_first;
12892     assert(prev->op_type == OP_PUSHMARK);
12893     OP *o = OpSIBLING(prev);
12894
12895     OP *next;
12896     for(; o; o = next) {
12897         next = OpSIBLING(o);
12898
12899         if(S_apply_builtin_cv_attribute(aTHX_ cv, o)) {
12900             op_sibling_splice(attrlist, prev, 1, NULL);
12901             op_free(o);
12902         }
12903         else {
12904             prev = o;
12905         }
12906     }
12907
12908     if(OpHAS_SIBLING(cLISTOPx(attrlist)->op_first))
12909         return attrlist;
12910
12911     /* The list is now entirely empty, we might as well discard it */
12912     op_free(attrlist);
12913     return NULL;
12914 }
12915
12916
12917 /* Do extra initialisation of a CV (typically one just created by
12918  * start_subparse()) if that CV is for a named sub
12919  */
12920
12921 void
12922 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12923 {
12924     PERL_ARGS_ASSERT_INIT_NAMED_CV;
12925
12926     if (nameop->op_type == OP_CONST) {
12927         const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12928         if (   strEQ(name, "BEGIN")
12929             || strEQ(name, "END")
12930             || strEQ(name, "INIT")
12931             || strEQ(name, "CHECK")
12932             || strEQ(name, "UNITCHECK")
12933         )
12934           CvSPECIAL_on(cv);
12935     }
12936     else
12937     /* State subs inside anonymous subs need to be
12938      clonable themselves. */
12939     if (   CvANON(CvOUTSIDE(cv))
12940         || CvCLONE(CvOUTSIDE(cv))
12941         || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12942                         CvOUTSIDE(cv)
12943                      ))[nameop->op_targ])
12944     )
12945       CvCLONE_on(cv);
12946 }
12947
12948
12949 static int
12950 S_yywarn(pTHX_ const char *const s, U32 flags)
12951 {
12952     PERL_ARGS_ASSERT_YYWARN;
12953
12954     PL_in_eval |= EVAL_WARNONLY;
12955     yyerror_pv(s, flags);
12956     return 0;
12957 }
12958
12959 void
12960 Perl_abort_execution(pTHX_ SV* msg_sv, const char * const name)
12961 {
12962     PERL_ARGS_ASSERT_ABORT_EXECUTION;
12963
12964     if (msg_sv) {
12965         if (PL_minus_c)
12966             Perl_croak(aTHX_ "%" SVf "%s had compilation errors.\n", SVfARG(msg_sv), name);
12967         else {
12968             Perl_croak(aTHX_
12969                     "%" SVf "Execution of %s aborted due to compilation errors.\n", SVfARG(msg_sv), name);
12970         }
12971     } else {
12972         if (PL_minus_c)
12973             Perl_croak(aTHX_ "%s had compilation errors.\n", name);
12974         else {
12975             Perl_croak(aTHX_
12976                     "Execution of %s aborted due to compilation errors.\n", name);
12977         }
12978     }
12979
12980     NOT_REACHED; /* NOTREACHED */
12981 }
12982
12983 void
12984 Perl_yyquit(pTHX)
12985 {
12986     /* Called, after at least one error has been found, to abort the parse now,
12987      * instead of trying to forge ahead */
12988
12989     yyerror_pvn(NULL, 0, 0);
12990 }
12991
12992 int
12993 Perl_yyerror(pTHX_ const char *const s)
12994 {
12995     PERL_ARGS_ASSERT_YYERROR;
12996     int r = yyerror_pvn(s, strlen(s), 0);
12997     return r;
12998 }
12999
13000 int
13001 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
13002 {
13003     PERL_ARGS_ASSERT_YYERROR_PV;
13004     int r = yyerror_pvn(s, strlen(s), flags);
13005     return r;
13006 }
13007
13008 int
13009 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
13010 {
13011     const char *context = NULL;
13012     int contlen = -1;
13013     SV *msg;
13014     SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
13015     int yychar  = PL_parser->yychar;
13016
13017     /* Output error message 's' with length 'len'.  'flags' are SV flags that
13018      * apply.  If the number of errors found is large enough, it abandons
13019      * parsing.  If 's' is NULL, there is no message, and it abandons
13020      * processing unconditionally */
13021
13022     if (s != NULL) {
13023         if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
13024             sv_catpvs(where_sv, "at EOF");
13025         else if (   PL_oldoldbufptr
13026                  && PL_bufptr > PL_oldoldbufptr
13027                  && PL_bufptr - PL_oldoldbufptr < 200
13028                  && PL_oldoldbufptr != PL_oldbufptr
13029                  && PL_oldbufptr != PL_bufptr)
13030         {
13031             while (isSPACE(*PL_oldoldbufptr))
13032                 PL_oldoldbufptr++;
13033             context = PL_oldoldbufptr;
13034             contlen = PL_bufptr - PL_oldoldbufptr;
13035         }
13036         else if (  PL_oldbufptr
13037                 && PL_bufptr > PL_oldbufptr
13038                 && PL_bufptr - PL_oldbufptr < 200
13039                 && PL_oldbufptr != PL_bufptr)
13040         {
13041             while (isSPACE(*PL_oldbufptr))
13042                 PL_oldbufptr++;
13043             context = PL_oldbufptr;
13044             contlen = PL_bufptr - PL_oldbufptr;
13045         }
13046         else if (yychar > 255)
13047             sv_catpvs(where_sv, "next token ???");
13048         else if (yychar == YYEMPTY) {
13049             if (PL_lex_state == LEX_NORMAL)
13050                 sv_catpvs(where_sv, "at end of line");
13051             else if (PL_lex_inpat)
13052                 sv_catpvs(where_sv, "within pattern");
13053             else
13054                 sv_catpvs(where_sv, "within string");
13055         }
13056         else {
13057             sv_catpvs(where_sv, "next char ");
13058             if (yychar < 32)
13059                 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13060             else if (isPRINT_LC(yychar)) {
13061                 const char string = yychar;
13062                 sv_catpvn(where_sv, &string, 1);
13063             }
13064             else
13065                 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13066         }
13067         msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
13068         Perl_sv_catpvf(aTHX_ msg, " at %s line %" LINE_Tf ", ",
13069             OutCopFILE(PL_curcop),
13070             (PL_parser->preambling == NOLINE
13071                    ? CopLINE(PL_curcop)
13072                    : PL_parser->preambling));
13073         if (context)
13074             Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
13075                                  UTF8fARG(UTF, contlen, context));
13076         else
13077             Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
13078         if (   PL_multi_start < PL_multi_end
13079             && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
13080         {
13081             Perl_sv_catpvf(aTHX_ msg,
13082             "  (Might be a runaway multi-line %c%c string starting on"
13083             " line %" LINE_Tf ")\n",
13084                     (int)PL_multi_open,(int)PL_multi_close,(line_t)PL_multi_start);
13085             PL_multi_end = 0;
13086         }
13087         if (PL_in_eval & EVAL_WARNONLY) {
13088             PL_in_eval &= ~EVAL_WARNONLY;
13089             Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
13090         }
13091         else {
13092             qerror(msg);
13093         }
13094     }
13095     /* if there was no message then this is a yyquit(), which is actualy handled
13096      * by qerror() with a NULL argument */
13097     if (s == NULL)
13098         qerror(NULL);
13099
13100     PL_in_my = 0;
13101     PL_in_my_stash = NULL;
13102     return 0;
13103 }
13104
13105 STATIC char*
13106 S_swallow_bom(pTHX_ U8 *s)
13107 {
13108     const STRLEN slen = SvCUR(PL_linestr);
13109
13110     PERL_ARGS_ASSERT_SWALLOW_BOM;
13111
13112     switch (s[0]) {
13113     case 0xFF:
13114         if (s[1] == 0xFE) {
13115             /* UTF-16 little-endian? (or UTF-32LE?) */
13116             if (s[2] == 0 && s[3] == 0)  /* UTF-32 little-endian */
13117                 /* diag_listed_as: Unsupported script encoding %s */
13118                 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13119 #ifndef PERL_NO_UTF16_FILTER
13120 #ifdef DEBUGGING
13121             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13122 #endif
13123             s += 2;
13124             if (PL_bufend > (char*)s) {
13125                 s = add_utf16_textfilter(s, TRUE);
13126             }
13127 #else
13128             /* diag_listed_as: Unsupported script encoding %s */
13129             Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13130 #endif
13131         }
13132         break;
13133     case 0xFE:
13134         if (s[1] == 0xFF) {   /* UTF-16 big-endian? */
13135 #ifndef PERL_NO_UTF16_FILTER
13136 #ifdef DEBUGGING
13137             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13138 #endif
13139             s += 2;
13140             if (PL_bufend > (char *)s) {
13141                 s = add_utf16_textfilter(s, FALSE);
13142             }
13143 #else
13144             /* diag_listed_as: Unsupported script encoding %s */
13145             Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13146 #endif
13147         }
13148         break;
13149     case BOM_UTF8_FIRST_BYTE: {
13150         if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
13151 #ifdef DEBUGGING
13152             if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13153 #endif
13154             s += sizeof(BOM_UTF8) - 1;                     /* UTF-8 */
13155         }
13156         break;
13157     }
13158     case 0:
13159         if (slen > 3) {
13160              if (s[1] == 0) {
13161                   if (s[2] == 0xFE && s[3] == 0xFF) {
13162                        /* UTF-32 big-endian */
13163                        /* diag_listed_as: Unsupported script encoding %s */
13164                        Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13165                   }
13166              }
13167              else if (s[2] == 0 && s[3] != 0) {
13168                   /* Leading bytes
13169                    * 00 xx 00 xx
13170                    * are a good indicator of UTF-16BE. */
13171 #ifndef PERL_NO_UTF16_FILTER
13172 #ifdef DEBUGGING
13173                   if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13174 #endif
13175                   s = add_utf16_textfilter(s, FALSE);
13176 #else
13177                   /* diag_listed_as: Unsupported script encoding %s */
13178                   Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13179 #endif
13180              }
13181         }
13182         break;
13183
13184     default:
13185          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13186                   /* Leading bytes
13187                    * xx 00 xx 00
13188                    * are a good indicator of UTF-16LE. */
13189 #ifndef PERL_NO_UTF16_FILTER
13190 #ifdef DEBUGGING
13191               if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13192 #endif
13193               s = add_utf16_textfilter(s, TRUE);
13194 #else
13195               /* diag_listed_as: Unsupported script encoding %s */
13196               Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13197 #endif
13198          }
13199     }
13200     return (char*)s;
13201 }
13202
13203
13204 #ifndef PERL_NO_UTF16_FILTER
13205 static I32
13206 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
13207 {
13208     SV *const filter = FILTER_DATA(idx);
13209     /* We re-use this each time round, throwing the contents away before we
13210        return.  */
13211     SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
13212     SV *const utf8_buffer = filter;
13213     IV status = IoPAGE(filter);
13214     const bool reverse = cBOOL(IoLINES(filter));
13215     I32 retval;
13216
13217     PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13218
13219     /* As we're automatically added, at the lowest level, and hence only called
13220        from this file, we can be sure that we're not called in block mode. Hence
13221        don't bother writing code to deal with block mode.  */
13222     if (maxlen) {
13223         Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13224     }
13225     if (status < 0) {
13226         Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
13227     }
13228     DEBUG_P(PerlIO_printf(Perl_debug_log,
13229                           "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13230                           FPTR2DPTR(void *, S_utf16_textfilter),
13231                           reverse ? 'l' : 'b', idx, maxlen, status,
13232                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13233
13234     while (1) {
13235         STRLEN chars;
13236         STRLEN have;
13237         Size_t newlen;
13238         U8 *end;
13239         /* First, look in our buffer of existing UTF-8 data:  */
13240         char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13241
13242         if (nl) {
13243             ++nl;
13244         } else if (status == 0) {
13245             /* EOF */
13246             IoPAGE(filter) = 0;
13247             nl = SvEND(utf8_buffer);
13248         }
13249         if (nl) {
13250             STRLEN got = nl - SvPVX(utf8_buffer);
13251             /* Did we have anything to append?  */
13252             retval = got != 0;
13253             sv_catpvn(sv, SvPVX(utf8_buffer), got);
13254             /* Everything else in this code works just fine if SVp_POK isn't
13255                set.  This, however, needs it, and we need it to work, else
13256                we loop infinitely because the buffer is never consumed.  */
13257             sv_chop(utf8_buffer, nl);
13258             break;
13259         }
13260
13261         /* OK, not a complete line there, so need to read some more UTF-16.
13262            Read an extra octect if the buffer currently has an odd number. */
13263         while (1) {
13264             if (status <= 0)
13265                 break;
13266             if (SvCUR(utf16_buffer) >= 2) {
13267                 /* Location of the high octet of the last complete code point.
13268                    Gosh, UTF-16 is a pain. All the benefits of variable length,
13269                    *coupled* with all the benefits of partial reads and
13270                    endianness.  */
13271                 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13272                     + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13273
13274                 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13275                     break;
13276                 }
13277
13278                 /* We have the first half of a surrogate. Read more.  */
13279                 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13280             }
13281
13282             status = FILTER_READ(idx + 1, utf16_buffer,
13283                                  160 + (SvCUR(utf16_buffer) & 1));
13284             DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
13285             DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
13286             if (status < 0) {
13287                 /* Error */
13288                 IoPAGE(filter) = status;
13289                 return status;
13290             }
13291         }
13292
13293         /* 'chars' isn't quite the right name, as code points above 0xFFFF
13294          * require 4 bytes per char */
13295         chars = SvCUR(utf16_buffer) >> 1;
13296         have = SvCUR(utf8_buffer);
13297
13298         /* Assume the worst case size as noted by the functions: twice the
13299          * number of input bytes */
13300         SvGROW(utf8_buffer, have + chars * 4 + 1);
13301
13302         if (reverse) {
13303             end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13304                                          (U8*)SvPVX_const(utf8_buffer) + have,
13305                                          chars * 2, &newlen);
13306         } else {
13307             end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
13308                                 (U8*)SvPVX_const(utf8_buffer) + have,
13309                                 chars * 2, &newlen);
13310         }
13311         SvCUR_set(utf8_buffer, have + newlen);
13312         *end = '\0';
13313
13314         /* No need to keep this SV "well-formed" with a '\0' after the end, as
13315            it's private to us, and utf16_to_utf8{,reversed} take a
13316            (pointer,length) pair, rather than a NUL-terminated string.  */
13317         if(SvCUR(utf16_buffer) & 1) {
13318             *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13319             SvCUR_set(utf16_buffer, 1);
13320         } else {
13321             SvCUR_set(utf16_buffer, 0);
13322         }
13323     }
13324     DEBUG_P(PerlIO_printf(Perl_debug_log,
13325                           "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
13326                           status,
13327                           (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13328     DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
13329     return retval;
13330 }
13331
13332 static U8 *
13333 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13334 {
13335     SV *filter = filter_add(S_utf16_textfilter, NULL);
13336
13337     PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13338
13339     IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
13340     SvPVCLEAR(filter);
13341     IoLINES(filter) = reversed;
13342     IoPAGE(filter) = 1; /* Not EOF */
13343
13344     /* Sadly, we have to return a valid pointer, come what may, so we have to
13345        ignore any error return from this.  */
13346     SvCUR_set(PL_linestr, 0);
13347     if (FILTER_READ(0, PL_linestr, 0)) {
13348         SvUTF8_on(PL_linestr);
13349     } else {
13350         SvUTF8_on(PL_linestr);
13351     }
13352     PL_bufend = SvEND(PL_linestr);
13353     return (U8*)SvPVX(PL_linestr);
13354 }
13355 #endif
13356
13357 /*
13358 =for apidoc scan_vstring
13359
13360 Returns a pointer to the next character after the parsed
13361 vstring, as well as updating the passed in sv.
13362
13363 Function must be called like
13364
13365         sv = sv_2mortal(newSV(5));
13366         s = scan_vstring(s,e,sv);
13367
13368 where s and e are the start and end of the string.
13369 The sv should already be large enough to store the vstring
13370 passed in, for performance reasons.
13371
13372 This function may croak if fatal warnings are enabled in the
13373 calling scope, hence the sv_2mortal in the example (to prevent
13374 a leak).  Make sure to do SvREFCNT_inc afterwards if you use
13375 sv_2mortal.
13376
13377 =cut
13378 */
13379
13380 char *
13381 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
13382 {
13383     const char *pos = s;
13384     const char *start = s;
13385
13386     PERL_ARGS_ASSERT_SCAN_VSTRING;
13387
13388     if (*pos == 'v') pos++;  /* get past 'v' */
13389     while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13390         pos++;
13391     if ( *pos != '.') {
13392         /* this may not be a v-string if followed by => */
13393         const char *next = pos;
13394         while (next < e && isSPACE(*next))
13395             ++next;
13396         if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
13397             /* return string not v-string */
13398             sv_setpvn(sv,(char *)s,pos-s);
13399             return (char *)pos;
13400         }
13401     }
13402
13403     if (!isALPHA(*pos)) {
13404         U8 tmpbuf[UTF8_MAXBYTES+1];
13405
13406         if (*s == 'v')
13407             s++;  /* get past 'v' */
13408
13409         SvPVCLEAR(sv);
13410
13411         for (;;) {
13412             /* this is atoi() that tolerates underscores */
13413             U8 *tmpend;
13414             UV rev = 0;
13415             const char *end = pos;
13416             UV mult = 1;
13417             while (--end >= s) {
13418                 if (*end != '_') {
13419                     const UV orev = rev;
13420                     rev += (*end - '0') * mult;
13421                     mult *= 10;
13422                     if (orev > rev)
13423                         /* diag_listed_as: Integer overflow in %s number */
13424                         Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13425                                          "Integer overflow in decimal number");
13426                 }
13427             }
13428
13429             /* Append native character for the rev point */
13430             tmpend = uvchr_to_utf8(tmpbuf, rev);
13431             sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13432             if (!UVCHR_IS_INVARIANT(rev))
13433                  SvUTF8_on(sv);
13434             if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13435                  s = ++pos;
13436             else {
13437                  s = pos;
13438                  break;
13439             }
13440             while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13441                  pos++;
13442         }
13443         SvPOK_on(sv);
13444         sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13445         SvRMAGICAL_on(sv);
13446     }
13447     return (char *)s;
13448 }
13449
13450 int
13451 Perl_keyword_plugin_standard(pTHX_
13452         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13453 {
13454     PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13455     PERL_UNUSED_CONTEXT;
13456     PERL_UNUSED_ARG(keyword_ptr);
13457     PERL_UNUSED_ARG(keyword_len);
13458     PERL_UNUSED_ARG(op_ptr);
13459     return KEYWORD_PLUGIN_DECLINE;
13460 }
13461
13462 STRLEN
13463 Perl_infix_plugin_standard(pTHX_
13464         char *operator_ptr, STRLEN operator_len, struct Perl_custom_infix **def)
13465 {
13466     PERL_ARGS_ASSERT_INFIX_PLUGIN_STANDARD;
13467     PERL_UNUSED_CONTEXT;
13468     PERL_UNUSED_ARG(operator_ptr);
13469     PERL_UNUSED_ARG(operator_len);
13470     PERL_UNUSED_ARG(def);
13471     return 0;
13472 }
13473
13474 /*
13475 =for apidoc_section $lexer
13476 =for apidoc wrap_keyword_plugin
13477
13478 Puts a C function into the chain of keyword plugins.  This is the
13479 preferred way to manipulate the L</PL_keyword_plugin> variable.
13480 C<new_plugin> is a pointer to the C function that is to be added to the
13481 keyword plugin chain, and C<old_plugin_p> points to the storage location
13482 where a pointer to the next function in the chain will be stored.  The
13483 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13484 while the value previously stored there is written to C<*old_plugin_p>.
13485
13486 L</PL_keyword_plugin> is global to an entire process, and a module wishing
13487 to hook keyword parsing may find itself invoked more than once per
13488 process, typically in different threads.  To handle that situation, this
13489 function is idempotent.  The location C<*old_plugin_p> must initially
13490 (once per process) contain a null pointer.  A C variable of static
13491 duration (declared at file scope, typically also marked C<static> to give
13492 it internal linkage) will be implicitly initialised appropriately, if it
13493 does not have an explicit initialiser.  This function will only actually
13494 modify the plugin chain if it finds C<*old_plugin_p> to be null.  This
13495 function is also thread safe on the small scale.  It uses appropriate
13496 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13497
13498 When this function is called, the function referenced by C<new_plugin>
13499 must be ready to be called, except for C<*old_plugin_p> being unfilled.
13500 In a threading situation, C<new_plugin> may be called immediately, even
13501 before this function has returned.  C<*old_plugin_p> will always be
13502 appropriately set before C<new_plugin> is called.  If C<new_plugin>
13503 decides not to do anything special with the identifier that it is given
13504 (which is the usual case for most calls to a keyword plugin), it must
13505 chain the plugin function referenced by C<*old_plugin_p>.
13506
13507 Taken all together, XS code to install a keyword plugin should typically
13508 look something like this:
13509
13510     static Perl_keyword_plugin_t next_keyword_plugin;
13511     static OP *my_keyword_plugin(pTHX_
13512         char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13513     {
13514         if (memEQs(keyword_ptr, keyword_len,
13515                    "my_new_keyword")) {
13516             ...
13517         } else {
13518             return next_keyword_plugin(aTHX_
13519                 keyword_ptr, keyword_len, op_ptr);
13520         }
13521     }
13522     BOOT:
13523         wrap_keyword_plugin(my_keyword_plugin,
13524                             &next_keyword_plugin);
13525
13526 Direct access to L</PL_keyword_plugin> should be avoided.
13527
13528 =cut
13529 */
13530
13531 void
13532 Perl_wrap_keyword_plugin(pTHX_
13533     Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13534 {
13535
13536     PERL_UNUSED_CONTEXT;
13537     PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13538     if (*old_plugin_p) return;
13539     KEYWORD_PLUGIN_MUTEX_LOCK;
13540     if (!*old_plugin_p) {
13541         *old_plugin_p = PL_keyword_plugin;
13542         PL_keyword_plugin = new_plugin;
13543     }
13544     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13545 }
13546
13547 /*
13548 =for apidoc wrap_infix_plugin
13549
13550 B<NOTE:> This API exists entirely for the purpose of making the CPAN module
13551 C<XS::Parse::Infix> work. It is not expected that additional modules will make
13552 use of it; rather, that they should use C<XS::Parse::Infix> to provide parsing
13553 of new infix operators.
13554
13555 Puts a C function into the chain of infix plugins.  This is the preferred
13556 way to manipulate the L</PL_infix_plugin> variable.  C<new_plugin> is a
13557 pointer to the C function that is to be added to the infix plugin chain, and
13558 C<old_plugin_p> points to a storage location where a pointer to the next
13559 function in the chain will be stored.  The value of C<new_plugin> is written
13560 into the L</PL_infix_plugin> variable, while the value previously stored there
13561 is written to C<*old_plugin_p>.
13562
13563 Direct access to L</PL_infix_plugin> should be avoided.
13564
13565 =cut
13566 */
13567
13568 void
13569 Perl_wrap_infix_plugin(pTHX_
13570     Perl_infix_plugin_t new_plugin, Perl_infix_plugin_t *old_plugin_p)
13571 {
13572
13573     PERL_UNUSED_CONTEXT;
13574     PERL_ARGS_ASSERT_WRAP_INFIX_PLUGIN;
13575     if (*old_plugin_p) return;
13576     /* We use the same mutex as for PL_keyword_plugin as it's so rare either
13577      * of them is actually updated; no need for a dedicated one each */
13578     KEYWORD_PLUGIN_MUTEX_LOCK;
13579     if (!*old_plugin_p) {
13580         *old_plugin_p = PL_infix_plugin;
13581         PL_infix_plugin = new_plugin;
13582     }
13583     KEYWORD_PLUGIN_MUTEX_UNLOCK;
13584 }
13585
13586 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13587 static void
13588 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13589 {
13590     SAVEI32(PL_lex_brackets);
13591     if (PL_lex_brackets > 100)
13592         Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13593     PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13594     SAVEI32(PL_lex_allbrackets);
13595     PL_lex_allbrackets = 0;
13596     SAVEI8(PL_lex_fakeeof);
13597     PL_lex_fakeeof = (U8)fakeeof;
13598     if(yyparse(gramtype) && !PL_parser->error_count)
13599         qerror(Perl_mess(aTHX_ "Parse error"));
13600 }
13601
13602 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13603 static OP *
13604 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13605 {
13606     OP *o;
13607     ENTER;
13608     SAVEVPTR(PL_eval_root);
13609     PL_eval_root = NULL;
13610     parse_recdescent(gramtype, fakeeof);
13611     o = PL_eval_root;
13612     LEAVE;
13613     return o;
13614 }
13615
13616 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13617 static OP *
13618 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13619 {
13620     OP *exprop;
13621     if (flags & ~PARSE_OPTIONAL)
13622         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13623     exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13624     if (!exprop && !(flags & PARSE_OPTIONAL)) {
13625         if (!PL_parser->error_count)
13626             qerror(Perl_mess(aTHX_ "Parse error"));
13627         exprop = newOP(OP_NULL, 0);
13628     }
13629     return exprop;
13630 }
13631
13632 /*
13633 =for apidoc parse_arithexpr
13634
13635 Parse a Perl arithmetic expression.  This may contain operators of precedence
13636 down to the bit shift operators.  The expression must be followed (and thus
13637 terminated) either by a comparison or lower-precedence operator or by
13638 something that would normally terminate an expression such as semicolon.
13639 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13640 otherwise it is mandatory.  It is up to the caller to ensure that the
13641 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13642 the source of the code to be parsed and the lexical context for the
13643 expression.
13644
13645 The op tree representing the expression is returned.  If an optional
13646 expression is absent, a null pointer is returned, otherwise the pointer
13647 will be non-null.
13648
13649 If an error occurs in parsing or compilation, in most cases a valid op
13650 tree is returned anyway.  The error is reflected in the parser state,
13651 normally resulting in a single exception at the top level of parsing
13652 which covers all the compilation errors that occurred.  Some compilation
13653 errors, however, will throw an exception immediately.
13654
13655 =for apidoc Amnh||PARSE_OPTIONAL
13656
13657 =cut
13658
13659 */
13660
13661 OP *
13662 Perl_parse_arithexpr(pTHX_ U32 flags)
13663 {
13664     return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13665 }
13666
13667 /*
13668 =for apidoc parse_termexpr
13669
13670 Parse a Perl term expression.  This may contain operators of precedence
13671 down to the assignment operators.  The expression must be followed (and thus
13672 terminated) either by a comma or lower-precedence operator or by
13673 something that would normally terminate an expression such as semicolon.
13674 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13675 otherwise it is mandatory.  It is up to the caller to ensure that the
13676 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13677 the source of the code to be parsed and the lexical context for the
13678 expression.
13679
13680 The op tree representing the expression is returned.  If an optional
13681 expression is absent, a null pointer is returned, otherwise the pointer
13682 will be non-null.
13683
13684 If an error occurs in parsing or compilation, in most cases a valid op
13685 tree is returned anyway.  The error is reflected in the parser state,
13686 normally resulting in a single exception at the top level of parsing
13687 which covers all the compilation errors that occurred.  Some compilation
13688 errors, however, will throw an exception immediately.
13689
13690 =cut
13691 */
13692
13693 OP *
13694 Perl_parse_termexpr(pTHX_ U32 flags)
13695 {
13696     return parse_expr(LEX_FAKEEOF_COMMA, flags);
13697 }
13698
13699 /*
13700 =for apidoc parse_listexpr
13701
13702 Parse a Perl list expression.  This may contain operators of precedence
13703 down to the comma operator.  The expression must be followed (and thus
13704 terminated) either by a low-precedence logic operator such as C<or> or by
13705 something that would normally terminate an expression such as semicolon.
13706 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13707 otherwise it is mandatory.  It is up to the caller to ensure that the
13708 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13709 the source of the code to be parsed and the lexical context for the
13710 expression.
13711
13712 The op tree representing the expression is returned.  If an optional
13713 expression is absent, a null pointer is returned, otherwise the pointer
13714 will be non-null.
13715
13716 If an error occurs in parsing or compilation, in most cases a valid op
13717 tree is returned anyway.  The error is reflected in the parser state,
13718 normally resulting in a single exception at the top level of parsing
13719 which covers all the compilation errors that occurred.  Some compilation
13720 errors, however, will throw an exception immediately.
13721
13722 =cut
13723 */
13724
13725 OP *
13726 Perl_parse_listexpr(pTHX_ U32 flags)
13727 {
13728     return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13729 }
13730
13731 /*
13732 =for apidoc parse_fullexpr
13733
13734 Parse a single complete Perl expression.  This allows the full
13735 expression grammar, including the lowest-precedence operators such
13736 as C<or>.  The expression must be followed (and thus terminated) by a
13737 token that an expression would normally be terminated by: end-of-file,
13738 closing bracketing punctuation, semicolon, or one of the keywords that
13739 signals a postfix expression-statement modifier.  If C<flags> has the
13740 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13741 mandatory.  It is up to the caller to ensure that the dynamic parser
13742 state (L</PL_parser> et al) is correctly set to reflect the source of
13743 the code to be parsed and the lexical context for the expression.
13744
13745 The op tree representing the expression is returned.  If an optional
13746 expression is absent, a null pointer is returned, otherwise the pointer
13747 will be non-null.
13748
13749 If an error occurs in parsing or compilation, in most cases a valid op
13750 tree is returned anyway.  The error is reflected in the parser state,
13751 normally resulting in a single exception at the top level of parsing
13752 which covers all the compilation errors that occurred.  Some compilation
13753 errors, however, will throw an exception immediately.
13754
13755 =cut
13756 */
13757
13758 OP *
13759 Perl_parse_fullexpr(pTHX_ U32 flags)
13760 {
13761     return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13762 }
13763
13764 /*
13765 =for apidoc parse_block
13766
13767 Parse a single complete Perl code block.  This consists of an opening
13768 brace, a sequence of statements, and a closing brace.  The block
13769 constitutes a lexical scope, so C<my> variables and various compile-time
13770 effects can be contained within it.  It is up to the caller to ensure
13771 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13772 reflect the source of the code to be parsed and the lexical context for
13773 the statement.
13774
13775 The op tree representing the code block is returned.  This is always a
13776 real op, never a null pointer.  It will normally be a C<lineseq> list,
13777 including C<nextstate> or equivalent ops.  No ops to construct any kind
13778 of runtime scope are included by virtue of it being a block.
13779
13780 If an error occurs in parsing or compilation, in most cases a valid op
13781 tree (most likely null) is returned anyway.  The error is reflected in
13782 the parser state, normally resulting in a single exception at the top
13783 level of parsing which covers all the compilation errors that occurred.
13784 Some compilation errors, however, will throw an exception immediately.
13785
13786 The C<flags> parameter is reserved for future use, and must always
13787 be zero.
13788
13789 =cut
13790 */
13791
13792 OP *
13793 Perl_parse_block(pTHX_ U32 flags)
13794 {
13795     if (flags)
13796         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13797     return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13798 }
13799
13800 /*
13801 =for apidoc parse_barestmt
13802
13803 Parse a single unadorned Perl statement.  This may be a normal imperative
13804 statement or a declaration that has compile-time effect.  It does not
13805 include any label or other affixture.  It is up to the caller to ensure
13806 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13807 reflect the source of the code to be parsed and the lexical context for
13808 the statement.
13809
13810 The op tree representing the statement is returned.  This may be a
13811 null pointer if the statement is null, for example if it was actually
13812 a subroutine definition (which has compile-time side effects).  If not
13813 null, it will be ops directly implementing the statement, suitable to
13814 pass to L</newSTATEOP>.  It will not normally include a C<nextstate> or
13815 equivalent op (except for those embedded in a scope contained entirely
13816 within the statement).
13817
13818 If an error occurs in parsing or compilation, in most cases a valid op
13819 tree (most likely null) is returned anyway.  The error is reflected in
13820 the parser state, normally resulting in a single exception at the top
13821 level of parsing which covers all the compilation errors that occurred.
13822 Some compilation errors, however, will throw an exception immediately.
13823
13824 The C<flags> parameter is reserved for future use, and must always
13825 be zero.
13826
13827 =cut
13828 */
13829
13830 OP *
13831 Perl_parse_barestmt(pTHX_ U32 flags)
13832 {
13833     if (flags)
13834         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13835     return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13836 }
13837
13838 /*
13839 =for apidoc parse_label
13840
13841 Parse a single label, possibly optional, of the type that may prefix a
13842 Perl statement.  It is up to the caller to ensure that the dynamic parser
13843 state (L</PL_parser> et al) is correctly set to reflect the source of
13844 the code to be parsed.  If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13845 label is optional, otherwise it is mandatory.
13846
13847 The name of the label is returned in the form of a fresh scalar.  If an
13848 optional label is absent, a null pointer is returned.
13849
13850 If an error occurs in parsing, which can only occur if the label is
13851 mandatory, a valid label is returned anyway.  The error is reflected in
13852 the parser state, normally resulting in a single exception at the top
13853 level of parsing which covers all the compilation errors that occurred.
13854
13855 =cut
13856 */
13857
13858 SV *
13859 Perl_parse_label(pTHX_ U32 flags)
13860 {
13861     if (flags & ~PARSE_OPTIONAL)
13862         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13863     if (PL_nexttoke) {
13864         PL_parser->yychar = yylex();
13865         if (PL_parser->yychar == LABEL) {
13866             SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13867             PL_parser->yychar = YYEMPTY;
13868             cSVOPx(pl_yylval.opval)->op_sv = NULL;
13869             op_free(pl_yylval.opval);
13870             return labelsv;
13871         } else {
13872             yyunlex();
13873             goto no_label;
13874         }
13875     } else {
13876         char *s, *t;
13877         STRLEN wlen, bufptr_pos;
13878         lex_read_space(0);
13879         t = s = PL_bufptr;
13880         if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13881             goto no_label;
13882         t = scan_word6(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen, FALSE);
13883         if (word_takes_any_delimiter(s, wlen))
13884             goto no_label;
13885         bufptr_pos = s - SvPVX(PL_linestr);
13886         PL_bufptr = t;
13887         lex_read_space(LEX_KEEP_PREVIOUS);
13888         t = PL_bufptr;
13889         s = SvPVX(PL_linestr) + bufptr_pos;
13890         if (t[0] == ':' && t[1] != ':') {
13891             PL_oldoldbufptr = PL_oldbufptr;
13892             PL_oldbufptr = s;
13893             PL_bufptr = t+1;
13894             return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13895         } else {
13896             PL_bufptr = s;
13897             no_label:
13898             if (flags & PARSE_OPTIONAL) {
13899                 return NULL;
13900             } else {
13901                 qerror(Perl_mess(aTHX_ "Parse error"));
13902                 return newSVpvs("x");
13903             }
13904         }
13905     }
13906 }
13907
13908 /*
13909 =for apidoc parse_fullstmt
13910
13911 Parse a single complete Perl statement.  This may be a normal imperative
13912 statement or a declaration that has compile-time effect, and may include
13913 optional labels.  It is up to the caller to ensure that the dynamic
13914 parser state (L</PL_parser> et al) is correctly set to reflect the source
13915 of the code to be parsed and the lexical context for the statement.
13916
13917 The op tree representing the statement is returned.  This may be a
13918 null pointer if the statement is null, for example if it was actually
13919 a subroutine definition (which has compile-time side effects).  If not
13920 null, it will be the result of a L</newSTATEOP> call, normally including
13921 a C<nextstate> or equivalent op.
13922
13923 If an error occurs in parsing or compilation, in most cases a valid op
13924 tree (most likely null) is returned anyway.  The error is reflected in
13925 the parser state, normally resulting in a single exception at the top
13926 level of parsing which covers all the compilation errors that occurred.
13927 Some compilation errors, however, will throw an exception immediately.
13928
13929 The C<flags> parameter is reserved for future use, and must always
13930 be zero.
13931
13932 =cut
13933 */
13934
13935 OP *
13936 Perl_parse_fullstmt(pTHX_ U32 flags)
13937 {
13938     if (flags)
13939         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13940     return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13941 }
13942
13943 /*
13944 =for apidoc parse_stmtseq
13945
13946 Parse a sequence of zero or more Perl statements.  These may be normal
13947 imperative statements, including optional labels, or declarations
13948 that have compile-time effect, or any mixture thereof.  The statement
13949 sequence ends when a closing brace or end-of-file is encountered in a
13950 place where a new statement could have validly started.  It is up to
13951 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13952 is correctly set to reflect the source of the code to be parsed and the
13953 lexical context for the statements.
13954
13955 The op tree representing the statement sequence is returned.  This may
13956 be a null pointer if the statements were all null, for example if there
13957 were no statements or if there were only subroutine definitions (which
13958 have compile-time side effects).  If not null, it will be a C<lineseq>
13959 list, normally including C<nextstate> or equivalent ops.
13960
13961 If an error occurs in parsing or compilation, in most cases a valid op
13962 tree is returned anyway.  The error is reflected in the parser state,
13963 normally resulting in a single exception at the top level of parsing
13964 which covers all the compilation errors that occurred.  Some compilation
13965 errors, however, will throw an exception immediately.
13966
13967 The C<flags> parameter is reserved for future use, and must always
13968 be zero.
13969
13970 =cut
13971 */
13972
13973 OP *
13974 Perl_parse_stmtseq(pTHX_ U32 flags)
13975 {
13976     OP *stmtseqop;
13977     I32 c;
13978     if (flags)
13979         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13980     stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13981     c = lex_peek_unichar(0);
13982     if (c != -1 && c != /*{*/'}')
13983         qerror(Perl_mess(aTHX_ "Parse error"));
13984     return stmtseqop;
13985 }
13986
13987 /*
13988 =for apidoc parse_subsignature
13989
13990 Parse a subroutine signature declaration. This is the contents of the
13991 parentheses following a named or anonymous subroutine declaration when the
13992 C<signatures> feature is enabled. Note that this function neither expects
13993 nor consumes the opening and closing parentheses around the signature; it
13994 is the caller's job to handle these.
13995
13996 This function must only be called during parsing of a subroutine; after
13997 L</start_subparse> has been called. It might allocate lexical variables on
13998 the pad for the current subroutine.
13999
14000 The op tree to unpack the arguments from the stack at runtime is returned.
14001 This op tree should appear at the beginning of the compiled function. The
14002 caller may wish to use L</op_append_list> to build their function body
14003 after it, or splice it together with the body before calling L</newATTRSUB>.
14004
14005 The C<flags> parameter is reserved for future use, and must always
14006 be zero.
14007
14008 =cut
14009 */
14010
14011 OP *
14012 Perl_parse_subsignature(pTHX_ U32 flags)
14013 {
14014     if (flags)
14015         Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
14016     return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
14017 }
14018
14019 /*
14020  * ex: set ts=8 sts=4 sw=4 et:
14021  */