#define PL_lex_casestack (PL_parser->lex_casestack)
#define PL_lex_defer (PL_parser->lex_defer)
#define PL_lex_dojoin (PL_parser->lex_dojoin)
-#define PL_lex_expect (PL_parser->lex_expect)
#define PL_lex_formbrack (PL_parser->lex_formbrack)
#define PL_lex_inpat (PL_parser->lex_inpat)
#define PL_lex_inwhat (PL_parser->lex_inwhat)
#define XFAKEBRACK 0x80
#ifdef USE_UTF8_SCRIPTS
-# define UTF (!IN_BYTES)
+# define UTF cBOOL(!IN_BYTES)
#else
-# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
+# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
#endif
/* The maximum number of characters preceding the unrecognized one to display */
#define SPACE_OR_TAB(c) isBLANK_A(c)
+#define HEXFP_PEEK(s) \
+ (((s[0] == '.') && \
+ (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
+ isALPHA_FOLD_EQ(s[0], 'p'))
+
/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
* can get by with a single comparison (if the compiler is smart enough).
#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
-# define SKIPSPACE0(s) skipspace(s)
-# define SKIPSPACE1(s) skipspace(s)
-# define SKIPSPACE2(s,tsv) skipspace(s)
-# define PEEKSPACE(s) skipspace(s)
-
/*
* Convenience functions to return different tokens and prime the
* lexer for the next token. They all take an argument.
* PWop : power operator
* PMop : pattern-matching operator
* Aop : addition-level operator
+ * AopNOASSIGN : addition-level operator that is never part of .=
* Mop : multiplication-level operator
* Eop : equality-testing operator
* Rop : relational operator <= != gt
#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
-#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
-#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
+#define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
+ pl_yylval.ival=f, \
+ PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
+ REPORT((int)LOOPEX))
#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
-#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
-#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
-#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
-#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
+#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
+#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
+#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
-#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
-#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
+#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
+#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
PL_last_lop_op = f; \
if (*s == '(') \
return REPORT( (int)FUNC1 ); \
- s = PEEKSPACE(s); \
+ s = skipspace(s); \
return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
}
#define UNI(f) UNI3(f,XTERM,1)
STATIC int
S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
{
- dVAR;
-
PERL_ARGS_ASSERT_TOKEREPORT;
if (DEBUG_T_TEST) {
}
if (name)
Perl_sv_catpv(aTHX_ report, name);
- else if ((char)rv > ' ' && (char)rv <= '~')
+ else if (isGRAPH(rv))
{
Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
if ((char)rv == 'p')
/*
* S_ao
*
- * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
- * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
+ * This subroutine looks for an '=' next to the operator that has just been
+ * parsed and turns it into an ASSIGNOP if it finds one.
*/
STATIC int
S_ao(pTHX_ int toketype)
{
- dVAR;
if (*PL_bufptr == '=') {
PL_bufptr++;
if (toketype == ANDAND)
pl_yylval.ival = OP_DORASSIGN;
toketype = ASSIGNOP;
}
- return toketype;
+ return REPORT(toketype);
}
/*
STATIC void
S_no_op(pTHX_ const char *const what, char *s)
{
- dVAR;
char * const oldbp = PL_bufptr;
const bool is_first = (PL_oldbufptr == PL_linestart);
STATIC void
S_missingterm(pTHX_ char *s)
{
- dVAR;
char tmpbuf[3];
char q;
if (s) {
bool
Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
{
- dVAR;
char he_name[8 + MAX_FEATURE_LEN] = "feature_";
PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
void
Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
{
- dVAR;
const char *s = NULL;
yy_parser *parser, *oparser;
if (flags && flags & ~LEX_START_FLAGS)
parser->bufend = parser->bufptr + SvCUR(parser->linestr);
parser->last_lop = parser->last_uni = NULL;
- assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+ STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
STATIC void
S_incline(pTHX_ const char *s)
{
- dVAR;
const char *t;
const char *n;
const char *e;
if (*e != '\n' && *e != '\0')
return; /* false alarm */
- line_num = atoi(n)-1;
+ line_num = grok_atou(n, &e) - 1;
if (t - s > 0) {
const STRLEN len = t - s;
STATIC void
S_check_uni(pTHX)
{
- dVAR;
const char *s;
const char *t;
/*
* S_lop
* Build a list operator (or something that might be one). The rules:
- * - if we have a next token, then it's a list operator [why?]
+ * - if we have a next token, then it's a list operator (no parens) for
+ * which the next token has already been parsed; e.g.,
+ * sort foo @args
+ * sort foo (@args)
* - if the next thing is an opening paren, then it's a function
* - else it's a list operator
*/
STATIC I32
S_lop(pTHX_ I32 f, int x, char *s)
{
- dVAR;
-
PERL_ARGS_ASSERT_LOP;
pl_yylval.ival = f;
CLINE;
- PL_expect = x;
PL_bufptr = s;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = (OPCODE)f;
if (PL_nexttoke)
goto lstop;
+ PL_expect = x;
if (*s == '(')
return REPORT(FUNC);
- s = PEEKSPACE(s);
+ s = skipspace(s);
if (*s == '(')
return REPORT(FUNC);
else {
STATIC void
S_force_next(pTHX_ I32 type)
{
- dVAR;
#ifdef DEBUGGING
if (DEBUG_T_TEST) {
PerlIO_printf(Perl_debug_log, "### forced token:\n");
PL_nexttoke++;
if (PL_lex_state != LEX_KNOWNEXT) {
PL_lex_defer = PL_lex_state;
- PL_lex_expect = PL_expect;
PL_lex_state = LEX_KNOWNEXT;
}
}
static int
S_postderef(pTHX_ int const funny, char const next)
{
- dVAR;
assert(funny == DOLSHARP || strchr("$@%&*", funny));
assert(strchr("*[{", next));
if (next == '*') {
STATIC SV *
S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
{
- dVAR;
SV * const sv = newSVpvn_utf8(start, len,
!IN_BYTES
&& UTF
- && !is_ascii_string((const U8*)start, len)
+ && !is_invariant_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}
* a keyword (do this if the word is a label, e.g. goto FOO)
* int allow_pack : if true, : characters will also be allowed (require,
* use, etc. do this)
- * int allow_initial_tick : used by the "sub" lexer only.
*/
STATIC char *
S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
{
- dVAR;
char *s;
STRLEN len;
PERL_ARGS_ASSERT_FORCE_WORD;
- start = SKIPSPACE1(start);
+ start = skipspace(start);
s = start;
if (isIDFIRST_lazy_if(s,UTF) ||
(allow_pack && *s == ':') )
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (check_keyword) {
char *s2 = PL_tokenbuf;
+ STRLEN len2 = len;
if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
- s2 += 6, len -= 6;
- if (keyword(s2, len, 0))
+ s2 += 6, len2 -= 6;
+ if (keyword(s2, len2, 0))
return start;
}
if (token == METHOD) {
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (*s == '(')
PL_expect = XTERM;
else {
STATIC void
S_force_ident(pTHX_ const char *s, int kind)
{
- dVAR;
-
PERL_ARGS_ASSERT_FORCE_IDENT;
if (s[0]) {
warnings if the symbol must be introduced in an eval.
GSAR 96-10-12 */
gv_fetchpvn_flags(s, len,
- (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
+ (PL_in_eval ? GV_ADDMULTI
: GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
kind == '$' ? SVt_PV :
kind == '@' ? SVt_PVAV :
STATIC char *
S_force_version(pTHX_ char *s, int guessing)
{
- dVAR;
OP *version = NULL;
char *d;
PERL_ARGS_ASSERT_FORCE_VERSION;
- s = SKIPSPACE1(s);
+ s = skipspace(s);
d = s;
if (*d == 'v')
STATIC char *
S_force_strict_version(pTHX_ char *s)
{
- dVAR;
OP *version = NULL;
const char *errstr = NULL;
version = newSVOP(OP_CONST, 0, ver);
}
else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
- (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
+ (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
{
PL_bufptr = s;
if (errstr)
STATIC SV *
S_tokeq(pTHX_ SV *sv)
{
- dVAR;
char *s;
char *send;
char *d;
STATIC I32
S_sublex_start(pTHX)
{
- dVAR;
const I32 op_type = pl_yylval.ival;
if (op_type == OP_NULL) {
STATIC I32
S_sublex_push(pTHX)
{
- dVAR;
LEXSHARED *shared;
const bool is_heredoc = PL_multi_close == '<';
ENTER;
STATIC I32
S_sublex_done(pTHX)
{
- dVAR;
if (!PL_lex_starts++) {
SV * const sv = newSVpvs("");
if (SvUTF8(PL_linestr))
STATIC char *
S_scan_const(pTHX_ char *start)
{
- dVAR;
char *send = PL_bufend; /* end of the constant */
- SV *sv = newSV(send - start); /* sv for the constant. See
- note below on sizing. */
+ SV *sv = newSV(send - start); /* sv for the constant. See note below
+ on sizing. */
char *s = start; /* start of the constant */
char *d = SvPVX(sv); /* destination for copies */
- bool dorange = FALSE; /* are we in a translit range? */
- bool didrange = FALSE; /* did we just finish a range? */
- bool in_charclass = FALSE; /* within /[...]/ */
- bool has_utf8 = FALSE; /* Output constant is UTF8 */
- bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
- to be UTF8? But, this can
- show as true when the source
- isn't utf8, as for example
- when it is entirely composed
- of hex constants */
+ bool dorange = FALSE; /* are we in a translit range? */
+ bool didrange = FALSE; /* did we just finish a range? */
+ bool in_charclass = FALSE; /* within /[...]/ */
+ bool has_utf8 = FALSE; /* Output constant is UTF8 */
+ bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
+ UTF8? But, this can show as true
+ when the source isn't utf8, as for
+ example when it is entirely composed
+ of hex constants */
SV *res; /* result from charnames */
/* Note on sizing: The scanned constant is placed into sv, which is
i = d - SvPVX_const(sv); /* remember current offset */
#ifdef EBCDIC
SvGROW(sv,
- SvLEN(sv) + (has_utf8 ?
- (512 - UTF_CONTINUATION_MARK +
- UNISKIP(0x100))
+ SvLEN(sv) + ((has_utf8)
+ ? (512 - UTF_CONTINUATION_MARK
+ + UNISKIP(0x100))
: 256));
/* How many two-byte within 0..255: 128 in UTF-8,
* 96 in UTF-8-mod. */
}
#ifdef EBCDIC
+ /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
+ * any subsets of these ranges into individual characters */
if (literal_endpoint == 2 &&
((isLOWER_A(min) && isLOWER_A(max)) ||
(isUPPER_A(min) && isUPPER_A(max))))
* symbol meaning, e.g. \x{2E} would be confused with a dot. But
* in spite of this, we do have to process \N here while the proper
* charnames handler is in scope. See bugs #56444 and #62056.
+ *
* There is a complication because \N in a pattern may also stand
* for 'match a non-nl', and not mean a charname, in which case its
* processing should be deferred to the regex compiler. To be a
SvPOK_on(sv);
*d = '\0';
/* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- UNISKIP(uv) + (STRLEN)(send - s) + 1);
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+ /* Above-latin1 in string
+ * implies no encoding */
+ |SV_UTF8_NO_ENCODING,
+ UNISKIP(uv) + (STRLEN)(send - s) + 1);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
continue;
case 'N':
- /* In a non-pattern \N must be a named character, like \N{LATIN
- * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
- * mean to match a non-newline. For non-patterns, named
- * characters are converted to their string equivalents. In
- * patterns, named characters are not converted to their
- * ultimate forms for the same reasons that other escapes
- * aren't. Instead, they are converted to the \N{U+...} form
- * to get the value from the charnames that is in effect right
- * now, while preserving the fact that it was a named character
- * so that the regex compiler knows this */
-
- /* The structure of this section of code (besides checking for
+ /* In a non-pattern \N must be like \N{U+0041}, or it can be a
+ * named character, like \N{LATIN SMALL LETTER A}, or a named
+ * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
+ * GRAVE}. For convenience all three forms are referred to as
+ * "named characters" below.
+ *
+ * For patterns, \N also can mean to match a non-newline. Code
+ * before this 'switch' statement should already have handled
+ * this situation, and hence this code only has to deal with
+ * the named character cases.
+ *
+ * For non-patterns, the named characters are converted to
+ * their string equivalents. In patterns, named characters are
+ * not converted to their ultimate forms for the same reasons
+ * that other escapes aren't. Instead, they are converted to
+ * the \N{U+...} form to get the value from the charnames that
+ * is in effect right now, while preserving the fact that it
+ * was a named character, so that the regex compiler knows
+ * this.
+ *
+ * The structure of this section of code (besides checking for
* errors and upgrading to utf8) is:
- * Further disambiguate between the two meanings of \N, and if
- * not a charname, go process it elsewhere
- * If of form \N{U+...}, pass it through if a pattern;
- * otherwise convert to utf8
- * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
- * pattern; otherwise convert to utf8 */
-
- /* Here, s points to the 'N'; the test below is guaranteed to
- * succeed if we are being called on a pattern as we already
- * know from a test above that the next character is a '{'.
- * On a non-pattern \N must mean 'named sequence, which
- * requires braces */
+ * If the named character is of the form \N{U+...}, pass it
+ * through if a pattern; otherwise convert the code point
+ * to utf8
+ * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
+ * if a pattern; otherwise convert to utf8
+ *
+ * If the regex compiler should ever need to differentiate
+ * between the \N{U+...} and \N{name} forms, that could easily
+ * be done here by stripping any leading zeros from the
+ * \N{U+...} case, and adding them to the other one. */
+
+ /* Here, 's' points to the 'N'; the test below is guaranteed to
+ * succeed if we are being called on a pattern, as we already
+ * know from a test above that the next character is a '{'. A
+ * non-pattern \N must mean 'named character', which requires
+ * braces */
s++;
if (*s != '{') {
yyerror("Missing braces on \\N{}");
| PERL_SCAN_DISALLOW_PREFIX;
STRLEN len;
- /* For \N{U+...}, the '...' is a unicode value even on
- * EBCDIC machines */
s += 2; /* Skip to next char after the 'U+' */
len = e - s;
uv = grok_hex(s, &len, &flags, NULL);
}
if (PL_lex_inpat) {
-
- /* On non-EBCDIC platforms, pass through to the regex
- * compiler unchanged. The reason we evaluated the
- * number above is to make sure there wasn't a syntax
- * error. But on EBCDIC we convert to native so
- * downstream code can continue to assume it's native
- */
s -= 5; /* Include the '\N{U+' */
#ifdef EBCDIC
- d += my_snprintf(d, e - s + 1 + 1, /* includes the }
+ /* On EBCDIC platforms, in \N{U+...}, the '...' is a
+ * Unicode value, so convert to native so downstream
+ * code can continue to assume it's native */
+ d += my_snprintf(d, e - s + 1 + 1, /* includes the '}'
and the \0 */
- "\\N{U+%X}",
- (unsigned int) UNI_TO_NATIVE(uv));
+ "\\N{U+%X}",
+ (unsigned int) UNI_TO_NATIVE(uv));
#else
- Copy(s, d, e - s + 1, char); /* 1 = include the } */
+ /* On non-EBCDIC platforms, pass it through unchanged.
+ * The reason we evaluated the number above is to make
+ * sure there wasn't a syntax error. */
+ Copy(s, d, e - s + 1, char); /* +1 is for the '}' */
d += e - s + 1;
#endif
}
else { /* Not a pattern: convert the hex to string */
- /* If destination is not in utf8, unconditionally
+ /* If the destination is not in utf8, unconditionally
* recode it to be so. This is because \N{} implies
* Unicode semantics, and scalars have to be in utf8
* to guarantee those semantics */
* through the string. Each character takes up
* 2 hex digits plus either a trailing dot or
* the "}" */
+ const char initial_text[] = "\\N{U+";
+ const STRLEN initial_len = sizeof(initial_text)
+ - 1;
d = off + SvGROW(sv, off
+ 3 * len
- + 6 /* For the "\N{U+", and
- trailing NUL */
+
+ /* +1 for trailing NUL */
+ + initial_len + 1
+
+ (STRLEN)(send - e));
- Copy("\\N{U+", d, 5, char);
- d += 5;
+ Copy(initial_text, d, initial_len, char);
+ d += initial_len;
while (str < str_end) {
char hex_string[4];
- my_snprintf(hex_string, sizeof(hex_string),
- "%02X.", (U8) *str);
+ int len =
+ my_snprintf(hex_string,
+ sizeof(hex_string),
+ "%02X.", (U8) *str);
+ PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
Copy(hex_string, d, 3, char);
d += 3;
str++;
}
- d--; /* We will overwrite below the final
+ d--; /* Below, we will overwrite the final
dot with a right brace */
}
else {
const STRLEN off = d - SvPVX_const(sv);
d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
}
- if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */
- sv_utf8_upgrade(res);
+ if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
+ sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
str = SvPV_const(res, len);
}
Copy(str, d, len, char);
*d++ = '\t';
break;
case 'e':
- *d++ = ASCII_TO_NATIVE('\033');
+ *d++ = ESC_NATIVE;
break;
case 'a':
*d++ = '\a';
" >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (PL_encoding && !has_utf8) {
- sv_recode_to_utf8(sv, PL_encoding);
+ if (IN_ENCODING && !has_utf8) {
+ sv_recode_to_utf8(sv, _get_encoding());
if (SvUTF8(sv))
has_utf8 = TRUE;
}
STATIC int
S_intuit_more(pTHX_ char *s)
{
- dVAR;
-
PERL_ARGS_ASSERT_INTUIT_MORE;
if (PL_lex_brackets)
*/
STATIC int
-S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
+S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
{
- dVAR;
char *s = start + (*start == '$');
char tmpbuf[sizeof PL_tokenbuf];
STRLEN len;
GV* indirgv;
+ /* Mustn't actually add anything to a symbol table.
+ But also don't want to "initialise" any placeholder
+ constants that might already be there into full
+ blown PVGVs with attached PVCV. */
+ GV * const gv =
+ ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
PERL_ARGS_ASSERT_INTUIT_METHOD;
if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
isUPPER(*PL_tokenbuf))
return 0;
- s = PEEKSPACE(s);
+ s = skipspace(s);
PL_bufptr = start;
PL_expect = XREF;
return *s == '(' ? FUNCMETH : METHOD;
return 0;
/* filehandle or package name makes it a method */
if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
- s = PEEKSPACE(s);
+ s = skipspace(s);
if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
return 0; /* no assumptions -- "=>" quotes bareword */
bare_package:
SV *
Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
{
- dVAR;
if (!funcp)
return NULL;
void
Perl_filter_del(pTHX_ filter_t funcp)
{
- dVAR;
SV *datasv;
PERL_ARGS_ASSERT_FILTER_DEL;
I32
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
{
- dVAR;
filter_t funcp;
SV *datasv = NULL;
/* This API is bad. It should have been using unsigned int for maxlen.
STATIC char *
S_filter_gets(pTHX_ SV *sv, STRLEN append)
{
- dVAR;
-
PERL_ARGS_ASSERT_FILTER_GETS;
#ifdef PERL_CR_FILTER
STATIC HV *
S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
{
- dVAR;
GV *gv;
PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
if (gv && GvCV(gv)) {
SV * const sv = cv_const_sv(GvCV(gv));
if (sv)
- pkgname = SvPV_const(sv, len);
+ return gv_stashsv(sv, 0);
}
return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
STATIC char *
S_tokenize_use(pTHX_ int is_use, char *s) {
- dVAR;
-
PERL_ARGS_ASSERT_TOKENIZE_USE;
if (PL_expect != XSTATE)
yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
is_use ? "use" : "no"));
PL_expect = XTERM;
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
s = force_version(s, TRUE);
if (*s == ';' || *s == '}'
- || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
+ || (s = skipspace(s), (*s == ';' || *s == '}'))) {
NEXTVAL_NEXTTOKE.opval = NULL;
force_next(WORD);
}
#ifdef DEBUGGING
static const char* const exp_name[] =
{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
- "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
+ "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
+ "TERMORDORDOR"
};
#endif
pl_yylval = PL_nextval[PL_nexttoke];
if (!PL_nexttoke) {
PL_lex_state = PL_lex_defer;
- PL_expect = PL_lex_expect;
PL_lex_defer = LEX_NORMAL;
}
{
PL_lex_starts = 0;
/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
if (PL_lex_casemods == 1 && PL_lex_inpat)
- OPERATOR(',');
+ TOKEN(',');
else
- Aop(OP_CONCAT);
+ AopNOASSIGN(OP_CONCAT);
}
else
return yylex();
s = PL_bufptr;
/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
if (!PL_lex_casemods && PL_lex_inpat)
- OPERATOR(',');
+ TOKEN(',');
else
- Aop(OP_CONCAT);
+ AopNOASSIGN(OP_CONCAT);
}
return yylex();
if (PL_lex_starts++) {
/* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
if (!PL_lex_casemods && PL_lex_inpat)
- OPERATOR(',');
+ TOKEN(',');
else
- Aop(OP_CONCAT);
+ AopNOASSIGN(OP_CONCAT);
}
else {
PL_bufptr = s;
: Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
if (len > UNRECOGNIZED_PRECEDE_COUNT) {
- d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
+ d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
} else {
d = PL_linestart;
}
* at least, set argv[0] to the basename of the Perl
* interpreter. So, having found "#!", we'll set it right.
*/
- SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
- SVt_PV)); /* $^X */
- assert(SvPOK(x) || SvGMAGICAL(x));
- if (sv_eq(x, CopFILESV(PL_curcop))) {
- sv_setpvn(x, ipath, ipathend - ipath);
- SvSETMAGIC(x);
- }
- else {
- STRLEN blen;
- STRLEN llen;
- const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
- const char * const lstart = SvPV_const(x,llen);
- if (llen < blen) {
- bstart += blen - llen;
- if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
- sv_setpvn(x, ipath, ipathend - ipath);
- SvSETMAGIC(x);
- }
+ SV* copfilesv = CopFILESV(PL_curcop);
+ if (copfilesv) {
+ SV * const x =
+ GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
+ SVt_PV)); /* $^X */
+ assert(SvPOK(x) || SvGMAGICAL(x));
+ if (sv_eq(x, copfilesv)) {
+ sv_setpvn(x, ipath, ipathend - ipath);
+ SvSETMAGIC(x);
+ }
+ else {
+ STRLEN blen;
+ STRLEN llen;
+ const char *bstart = SvPV_const(copfilesv, blen);
+ const char * const lstart = SvPV_const(x, llen);
+ if (llen < blen) {
+ bstart += blen - llen;
+ if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
+ sv_setpvn(x, ipath, ipathend - ipath);
+ SvSETMAGIC(x);
+ }
+ }
}
+ }
+ else {
+ /* Anything to do if no copfilesv? */
}
TAINT_NOT; /* $^X is always tainted, but that's OK */
}
* line contains "Perl" rather than "perl" */
if (!d) {
for (d = ipathend-4; d >= ipath; --d) {
- if ((*d == 'p' || *d == 'P')
+ if (isALPHA_FOLD_EQ(*d, 'p')
&& !ibcmp(d, "perl", 4))
{
break;
!= PL_unicode)
baduni = TRUE;
}
- if (baduni || *d1 == 'M' || *d1 == 'm') {
+ if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
const char * const m = d1;
while (*d1 && !isSPACE(*d1))
d1++;
Perl_croak(aTHX_
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
- case ' ': case '\t': case '\f': case 013:
+ case ' ': case '\t': case '\f': case '\v':
s++;
goto retry;
case '#':
}
else if (*s == '>') {
s++;
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (FEATURE_POSTDEREF_IS_ENABLED && (
((*s == '$' || *s == '&') && s[1] == '*')
||(*s == '$' && s[1] == '#' && s[2] == '*')
goto just_a_word_zero_gv;
}
s++;
+ {
+ OP *attrs;
+
switch (PL_expect) {
- OP *attrs;
case XOPERATOR:
if (!PL_in_my || PL_lex_state != LEX_NORMAL)
break;
case XATTRTERM:
PL_expect = XTERMBLOCK;
grabattrs:
- s = PEEKSPACE(s);
+ s = skipspace(s);
attrs = NULL;
while (isIDFIRST_lazy_if(s,UTF)) {
I32 tmp;
newSVOP(OP_CONST, 0,
sv));
}
- s = PEEKSPACE(d);
+ s = skipspace(d);
if (*s == ':' && s[1] != ':')
- s = PEEKSPACE(s+1);
+ s = skipspace(s+1);
else if (s == d)
break; /* require real whitespace or :'s */
/* XXX losing whitespace on sequential attributes here */
}
TOKEN(COLONATTR);
}
+ }
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
s--;
TOKEN(0);
PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
else
PL_expect = XTERM;
- s = SKIPSPACE1(s);
+ s = skipspace(s);
PL_lex_allbrackets++;
TOKEN('(');
case ';':
TOKEN(0);
CLINE;
s++;
- OPERATOR(';');
+ PL_expect = XSTATE;
+ TOKEN(';');
case ')':
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
TOKEN(0);
s++;
PL_lex_allbrackets--;
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (*s == '{')
PREBLOCK(')');
TERM(')');
}
}
/* FALLTHROUGH */
+ case XATTRTERM:
+ case XTERMBLOCK:
+ PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
+ PL_lex_allbrackets++;
+ PL_expect = XSTATE;
+ break;
case XATTRBLOCK:
case XBLOCK:
PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
PL_lex_allbrackets++;
PL_expect = XSTATE;
break;
- case XATTRTERM:
- case XTERMBLOCK:
- PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
+ case XBLOCKTERM:
+ PL_lex_brackstack[PL_lex_brackets++] = XTERM;
PL_lex_allbrackets++;
PL_expect = XSTATE;
break;
else
PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
PL_lex_allbrackets++;
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (*s == '}') {
if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
PL_expect = XTERM;
}
OPERATOR(HASHBRACK);
}
+ if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
+ /* ${...} or @{...} etc., but not print {...}
+ * Skip the disambiguation and treat this as a block.
+ */
+ goto block_expectation;
+ }
/* This hack serves to disambiguate a pair of curlies
* as being a block or an anon hash. Normally, expectation
* determines that, but in cases where we're not in a
if (*s == '\'' || *s == '"' || *s == '`') {
/* common case: get past first string, handling escapes */
for (t++; t < PL_bufend && *t != *s;)
- if (*t++ == '\\' && (*t == '\\' || *t == *s))
+ if (*t++ == '\\')
t++;
t++;
}
|| (*t == '=' && t[1] == '>')))
OPERATOR(HASHBRACK);
if (PL_expect == XREF)
- PL_expect = XTERM;
+ {
+ block_expectation:
+ /* If there is an opening brace or 'sub:', treat it
+ as a term to make ${{...}}{k} and &{sub:attr...}
+ dwim. Otherwise, treat it as a statement, so
+ map {no strict; ...} works.
+ */
+ s = skipspace(s);
+ if (*s == '{') {
+ PL_expect = XTERM;
+ break;
+ }
+ if (strnEQ(s, "sub", 3)) {
+ d = s + 3;
+ d = skipspace(d);
+ if (*d == ':') {
+ PL_expect = XTERM;
+ break;
+ }
+ }
+ PL_expect = XSTATE;
+ }
else {
PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
PL_expect = XSTATE;
break;
}
pl_yylval.ival = CopLINE(PL_curcop);
- if (isSPACE(*s) || *s == '#')
- PL_copline = NOLINE; /* invalidate current command line number */
+ PL_copline = NOLINE; /* invalidate current command line number */
TOKEN(formbrack ? '=' : '{');
case '}':
if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
if (PL_expect != XOPERATOR) {
if (s[1] != '<' && !strchr(s,'>'))
check_uni();
- if (s[1] == '<')
+ if (s[1] == '<' && s[2] != '>')
s = scan_heredoc(s);
else
s = scan_inputsymbol(s);
{
const char tmp = *s;
if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
&& intuit_more(s)) {
while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
t++;
if (*t++ == ',') {
- PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
+ PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
while (t < PL_bufend && *t != ']')
t++;
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
PREREF('@');
}
if (PL_lex_state == LEX_NORMAL)
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
if (*s == '{')
PL_tokenbuf[0] = '%';
TERM('@');
case '/': /* may be division, defined-or, or pattern */
- if (PL_expect == XTERMORDORDOR && s[1] == '/') {
+ if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
if (!PL_lex_allbrackets && PL_lex_fakeeof >=
(s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
TOKEN(0);
s += 2;
AOPERATOR(DORDOR);
}
- /* FALLTHROUGH */
- case '?': /* may either be conditional or pattern */
- if (PL_expect == XOPERATOR) {
- char tmp = *s++;
- if(tmp == '?') {
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
- s--;
- TOKEN(0);
- }
- PL_lex_allbrackets++;
- OPERATOR('?');
- }
- else {
- tmp = *s++;
- if(tmp == '/') {
- /* A // operator. */
- if (!PL_lex_allbrackets && PL_lex_fakeeof >=
- (*s == '=' ? LEX_FAKEEOF_ASSIGN :
- LEX_FAKEEOF_LOGIC)) {
- s -= 2;
- TOKEN(0);
- }
- AOPERATOR(DORDOR);
- }
- else {
- s--;
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
- s--;
- TOKEN(0);
- }
- Mop(OP_DIVIDE);
- }
- }
- }
- else {
- /* Disable warning on "study /blah/" */
- if (PL_oldoldbufptr == PL_last_uni
- && (*PL_last_uni != 's' || s - PL_last_uni < 5
- || memNE(PL_last_uni, "study", 5)
- || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
- ))
- check_uni();
- if (*s == '?')
- deprecate("?PATTERN? without explicit operator");
- s = scan_pat(s,OP_MATCH);
- TERM(sublex_start());
- }
+ else if (PL_expect == XOPERATOR) {
+ s++;
+ if (*s == '=' && !PL_lex_allbrackets &&
+ PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ s--;
+ TOKEN(0);
+ }
+ Mop(OP_DIVIDE);
+ }
+ else {
+ /* Disable warning on "study /blah/" */
+ if (PL_oldoldbufptr == PL_last_uni
+ && (*PL_last_uni != 's' || s - PL_last_uni < 5
+ || memNE(PL_last_uni, "study", 5)
+ || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
+ ))
+ check_uni();
+ s = scan_pat(s,OP_MATCH);
+ TERM(sublex_start());
+ }
+
+ case '?': /* conditional */
+ s++;
+ if (!PL_lex_allbrackets &&
+ PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
+ s--;
+ TOKEN(0);
+ }
+ PL_lex_allbrackets++;
+ OPERATOR('?');
case '.':
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
} else if (result == KEYWORD_PLUGIN_STMT) {
pl_yylval.opval = o;
CLINE;
- PL_expect = XSTATE;
+ if (!PL_nexttoke) PL_expect = XSTATE;
return REPORT(PLUGSTMT);
} else if (result == KEYWORD_PLUGIN_EXPR) {
pl_yylval.opval = o;
CLINE;
- PL_expect = XOPERATOR;
+ if (!PL_nexttoke) PL_expect = XOPERATOR;
return REPORT(PLUGEXPR);
} else {
Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
char tmpbuf[sizeof PL_tokenbuf + 1];
*tmpbuf = '&';
Copy(PL_tokenbuf, tmpbuf+1, len, char);
- off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, len+1, 0);
if (off != NOT_IN_PAD) {
assert(off); /* we assume this is boolean-true below */
if (PAD_COMPNAME_FLAGS_isOUR(off)) {
just_a_word: {
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
- const char penultchar =
- lastchar && PL_bufptr - 2 >= PL_linestart
- ? PL_bufptr[-2]
- : 0;
+ bool safebw;
/* Get the rest if it looks like a package qualifier */
no_op("Bareword",s);
}
- /* Look for a subroutine with this name in current package,
- unless this is a lexical sub, or name is "Foo::",
+ /* See if the name is "Foo::",
in which case Foo is a bareword
(and a package name). */
PL_tokenbuf[len] = '\0';
gv = NULL;
gvp = 0;
+ safebw = TRUE;
}
else {
- if (!lex && !gv) {
- /* Mustn't actually add anything to a symbol table.
- But also don't want to "initialise" any placeholder
- constants that might already be there into full
- blown PVGVs with attached PVCV. */
- gv = gv_fetchpvn_flags(PL_tokenbuf, len,
- GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
- SVt_PVCV);
- }
- len = 0;
+ safebw = FALSE;
}
/* if we saw a global override before, get the right name */
if (!sv)
sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
- len ? len : strlen(PL_tokenbuf));
+ len);
if (gvp) {
SV * const tmp_sv = sv;
sv = newSVpvs("CORE::GLOBAL::");
pl_yylval.opval->op_private = OPpCONST_BARE;
/* And if "Foo::", then that's what it certainly is. */
- if (len)
+ if (safebw)
goto safe_bareword;
if (!off)
{
OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
const_op->op_private = OPpCONST_BARE;
- rv2cv_op = newCVREF(0, const_op);
- cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
+ rv2cv_op =
+ newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
+ cv = lex
+ ? isGV(gv)
+ ? GvCV(gv)
+ : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
+ ? (CV *)SvRV(gv)
+ : ((CV *)gv)
+ : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
}
+ /* Use this var to track whether intuit_method has been
+ called. intuit_method returns 0 or > 255. */
+ tmp = 1;
+
/* See if it's the indirect object for a list operator. */
if (PL_oldoldbufptr &&
bool immediate_paren = *s == '(';
/* (Now we can afford to cross potential line boundary.) */
- s = SKIPSPACE2(s,nextPL_nextwhite);
+ s = skipspace(s);
/* Two barewords in a row may indicate method call. */
if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
- (tmp = intuit_method(s, gv, cv))) {
- op_free(rv2cv_op);
- if (tmp == METHOD && !PL_lex_allbrackets &&
- PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
- PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
- return REPORT(tmp);
+ (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+ goto method;
}
/* If not a declared subroutine, it's an indirect object. */
if (*s == '=' && s[1] == '>' && !pkgname) {
op_free(rv2cv_op);
CLINE;
- /* This is our own scalar, created a few lines above,
- so this is safe. */
- SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
- sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
- if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
- SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
- SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
+ if (gvp || (lex && !off)) {
+ assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
+ /* This is our own scalar, created a few lines
+ above, so this is safe. */
+ SvREADONLY_off(sv);
+ sv_setpv(sv, PL_tokenbuf);
+ if (UTF && !IN_BYTES
+ && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on(sv);
+ SvREADONLY_on(sv);
+ }
TERM(WORD);
}
}
NEXTVAL_NEXTTOKE.opval =
off ? rv2cv_op : pl_yylval.opval;
- PL_expect = XOPERATOR;
if (off)
op_free(pl_yylval.opval), force_next(PRIVATEREF);
else op_free(rv2cv_op), force_next(WORD);
if (!PL_lex_allbrackets &&
PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
- PREBLOCK(METHOD);
+ PL_expect = XBLOCKTERM;
+ PL_bufptr = s;
+ return REPORT(METHOD);
}
/* If followed by a bareword, see if it looks like indir obj. */
- if (!orig_keyword
+ if (tmp == 1 && !orig_keyword
&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
- && (tmp = intuit_method(s, gv, cv))) {
+ && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+ method:
+ if (lex && !off) {
+ assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
+ SvREADONLY_off(sv);
+ sv_setpvn(sv, PL_tokenbuf, len);
+ if (UTF && !IN_BYTES
+ && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on (sv);
+ else SvUTF8_off(sv);
+ }
op_free(rv2cv_op);
if (tmp == METHOD && !PL_lex_allbrackets &&
PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
/* Not a method, so call it a subroutine (if defined) */
if (cv) {
- if (lastchar == '-' && penultchar != '-') {
- const STRLEN l = len ? len : strlen(PL_tokenbuf);
- Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
- UTF8fARG(UTF, l, PL_tokenbuf),
- UTF8fARG(UTF, l, PL_tokenbuf));
- }
/* Check for a constant sub */
if ((sv = cv_const_sv_or_av(cv))) {
its_constant:
if (!IN_BYTES) {
if (UTF)
PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
- else if (PL_encoding) {
+ else if (IN_ENCODING) {
SV *name;
dSP;
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(PL_encoding);
+ XPUSHs(_get_encoding());
PUTBACK;
call_method("name", G_SCALAR);
SPAGAIN;
}
case KEY___SUB__:
- FUN0OP(newPVOP(OP_RUNCV,0,NULL));
+ FUN0OP(CvCLONE(PL_compcv)
+ ? newOP(OP_RUNCV, 0)
+ : newPVOP(OP_RUNCV,0,NULL));
case KEY_AUTOLOAD:
case KEY_DESTROY:
PREBLOCK(DEFAULT);
case KEY_do:
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (*s == '{')
PRETERMBLOCK(DO);
if (*s != '\'') {
1, &len);
if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
&& !keyword(PL_tokenbuf + 1, len, 0)) {
- d = SKIPSPACE1(d);
+ d = skipspace(d);
if (*d == '(') {
force_ident_maybe_lex('&');
s = d;
UNI(OP_DBMCLOSE);
case KEY_dump:
- PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_DUMP);
case KEY_else:
UNI(OP_EXIT);
case KEY_eval:
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (*s == '{') { /* block eval */
PL_expect = XTERMBLOCK;
UNIBRACK(OP_ENTERTRY);
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
return REPORT(0);
pl_yylval.ival = CopLINE(PL_curcop);
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
char *p = s;
else if ((PL_bufend - p) >= 4 &&
strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
p += 3;
- p = PEEKSPACE(p);
+ p = skipspace(p);
/* skip optional package name, as in "for my abc $x (..)" */
if (isIDFIRST_lazy_if(p,UTF)) {
p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
- p = PEEKSPACE(p);
+ p = skipspace(p);
}
if (*p != '$')
Perl_croak(aTHX_ "Missing $ on loop variable");
LOP(OP_GREPSTART, XREF);
case KEY_goto:
- PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_GOTO);
case KEY_gmtime:
LOP(OP_KILL,XTERM);
case KEY_last:
- PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_LAST);
case KEY_lc:
case KEY_my:
case KEY_state:
PL_in_my = (U16)tmp;
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (isIDFIRST_lazy_if(s,UTF)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
if (!PL_in_my_stash) {
char tmpbuf[1024];
+ int len;
PL_bufptr = s;
- my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
+ len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
+ PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
}
}
OPERATOR(MY);
case KEY_next:
- PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_NEXT);
case KEY_ne:
case KEY_no:
s = tokenize_use(0, s);
- TERM(USE);
+ TOKEN(USE);
case KEY_not:
- if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
+ if (*s == '(' || (s = skipspace(s), *s == '('))
FUN1(OP_NOT);
else {
if (!PL_lex_allbrackets &&
}
case KEY_open:
- s = SKIPSPACE1(s);
+ s = skipspace(s);
if (isIDFIRST_lazy_if(s,UTF)) {
const char *t;
d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
case KEY_package:
s = force_word(s,WORD,FALSE,TRUE);
- s = SKIPSPACE1(s);
+ s = skipspace(s);
s = force_strict_version(s);
- PL_lex_expect = XBLOCK;
- OPERATOR(PACKAGE);
+ PREBLOCK(PACKAGE);
case KEY_pipe:
LOP(OP_PIPE_OP,XTERM);
OLDLOP(OP_RETURN);
case KEY_require:
- s = SKIPSPACE1(s);
- PL_expect = XOPERATOR;
+ s = skipspace(s);
if (isDIGIT(*s)) {
s = force_version(s, FALSE);
}
}
else
pl_yylval.ival = 0;
- PL_expect = XTERM;
+ PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
PL_bufptr = s;
PL_last_uni = PL_oldbufptr;
PL_last_lop_op = OP_REQUIRE;
UNI(OP_RESET);
case KEY_redo:
- PL_expect = XOPERATOR;
- s = force_word(s,WORD,TRUE,FALSE);
LOOPX(OP_REDO);
case KEY_rename:
case KEY_sort:
checkcomma(s,PL_tokenbuf,"subroutine name");
- s = SKIPSPACE1(s);
+ s = skipspace(s);
PL_expect = XTERM;
s = force_word(s,WORD,TRUE,TRUE);
LOP(OP_SORT,XREF);
*PL_tokenbuf = '&';
if (memchr(tmpbuf, ':', len) || key != KEY_sub
|| pad_findmy_pvn(
- PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+ PL_tokenbuf, len + 1, 0
) != NOT_IN_PAD)
sv_setpvn(PL_subname, tmpbuf, len);
else {
case KEY_use:
s = tokenize_use(1, s);
- OPERATOR(USE);
+ TOKEN(USE);
case KEY_values:
UNI(OP_VALUES);
static int
S_pending_ident(pTHX)
{
- dVAR;
PADOFFSET tmp = 0;
const char pit = (char)pl_yylval.ival;
const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
}
else {
if (has_colon) {
+ /* "my" variable %s can't be in a package */
/* PL_no_myglob is constant */
GCC_DIAG_IGNORE(-Wformat-nonliteral);
yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
- PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
+ PL_in_my == KEY_my ? "my" : "state",
+ *PL_tokenbuf == '&' ? "subroutin" : "variabl",
+ PL_tokenbuf),
UTF ? SVf_UTF8 : 0);
GCC_DIAG_RESTORE;
}
if (!has_colon) {
if (!PL_in_my)
tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
- UTF ? SVf_UTF8 : 0);
+ 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
gv_fetchsv(sym,
- (PL_in_eval
- ? (GV_ADDMULTI | GV_ADDINEVAL)
- : GV_ADDMULTI
- ),
+ GV_ADDMULTI,
((PL_tokenbuf[0] == '$') ? SVt_PV
: (PL_tokenbuf[0] == '@') ? SVt_PVAV
: SVt_PVHV));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
- (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
+ (PL_in_eval ? GV_ADDMULTI : GV_ADD)
| ( UTF ? SVf_UTF8 : 0 ),
((PL_tokenbuf[0] == '$') ? SVt_PV
: (PL_tokenbuf[0] == '@') ? SVt_PVAV
STATIC void
S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
{
- dVAR;
-
PERL_ARGS_ASSERT_CHECKCOMMA;
if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
s++;
if (*s == ',') {
GV* gv;
+ PADOFFSET off;
if (keyword(w, s - w, 0))
return;
gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
if (gv && GvCVu(gv))
return;
+ if (s - w <= 254) {
+ char tmpbuf[256];
+ Copy(w, tmpbuf+1, s - w, char);
+ *tmpbuf = '&';
+ off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
+ if (off != NOT_IN_PAD) return;
+ }
Perl_croak(aTHX_ "No comma allowed after %s", what);
}
}
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
SV *sv, SV *pv, const char *type, STRLEN typelen)
{
- dVAR; dSP;
+ dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
SV *res;
SV *errsv = NULL;
PERL_STATIC_INLINE void
S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
- dVAR;
PERL_ARGS_ASSERT_PARSE_IDENT;
for (;;) {
STATIC char *
S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
{
- dVAR;
char *d = dest;
char * const e = d + destlen - 3; /* two-character token, ending NUL */
bool is_utf8 = cBOOL(UTF);
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
{
- dVAR;
I32 herelines = PL_parser->herelines;
SSize_t bracket = -1;
char funny = *s++;
PERL_ARGS_ASSERT_SCAN_IDENT;
if (isSPACE(*s))
- s = PEEKSPACE(s);
+ s = skipspace(s);
if (isDIGIT(*s)) {
while (isDIGIT(*s)) {
if (d >= e)
s++;
orig_copline = CopLINE(PL_curcop);
if (s < PL_bufend && isSPACE(*s)) {
- s = PEEKSPACE(s);
+ s = skipspace(s);
}
}
/* Is the byte 'd' a legal single character identifier name? 'u' is true
* iff Unicode semantics are to be used. The legal ones are any of:
- * a) ASCII digits
- * b) ASCII punctuation
+ * a) all ASCII characters except:
+ * 1) space-type ones, like \t and SPACE;
+ 2) NUL;
+ * 3) '{'
+ * The final case currently doesn't get this far in the program, so we
+ * don't test for it. If that were to change, it would be ok to allow it.
* c) When not under Unicode rules, any upper Latin1 character
- * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
- * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
- * the \s ones. */
-#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
- || isDIGIT_A((U8)(d)) \
- || (!(u) && !isASCII((U8)(d))) \
- || ((((U8)(d)) < 32) \
- && (((((U8)(d)) >= 14) \
- || (((U8)(d)) <= 8 && (d) != 0) \
- || (((U8)(d)) == 13)))) \
- || (((U8)(d)) == toCTRL('?')))
- if (s < PL_bufend
- && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+ * d) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ * Because all ASCII characters have the same representation whether
+ * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ * '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
+#ifdef EBCDIC
+# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ (isGRAPH_A(*(s)) || ((is_utf8) \
+ ? isIDFIRST_utf8((U8*) (s)) \
+ : (isGRAPH_L1(*s) \
+ && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+#else
+# define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \
+ && LIKELY(*(s) != '\0') \
+ && (! is_utf8 \
+ || isASCII_utf8((U8*) (s)) \
+ || isIDFIRST_utf8((U8*) (s))))
+#endif
+ if ((s <= PL_bufend - (is_utf8)
+ ? UTF8SKIP(s)
+ : 1)
+ && VALID_LEN_ONE_IDENT(s, is_utf8))
{
- if ( isCNTRL_A((U8)*s) ) {
- deprecate("literal control characters in variable names");
+ /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
+ * because often it has no graphic representation. (We can't get to
+ * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
+ * test for it.) */
+ if ((is_utf8)
+ ? ! isGRAPH_utf8( (U8*) s)
+ : (! isGRAPH_L1( (U8) *s)
+ || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
+ {
+ /* Split messages for back compat */
+ if (isCNTRL_A( (U8) *s)) {
+ deprecate("literal control characters in variable names");
+ }
+ else {
+ deprecate("literal non-graphic characters in variable names");
+ }
}
if (is_utf8) {
*d = '\0';
tmp_copline = CopLINE(PL_curcop);
if (s < PL_bufend && isSPACE(*s)) {
- s = PEEKSPACE(s);
+ s = skipspace(s);
}
if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
/* ${foo[0]} and ${foo{bar}} notation. */
if ( !tmp_copline )
tmp_copline = CopLINE(PL_curcop);
if (s < PL_bufend && isSPACE(*s)) {
- s = PEEKSPACE(s);
+ s = skipspace(s);
}
/* Expect to find a closing } after consuming any trailing whitespace.
}
static bool
-S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
-
- /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
- * the parse starting at 's', based on the subset that are valid in this
- * context input to this routine in 'valid_flags'. Advances s. Returns
- * TRUE if the input should be treated as a valid flag, so the next char
- * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
- * first call on the current regex. This routine will set it to any
+S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
+
+ /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
+ * found in the parse starting at 's', based on the subset that are valid
+ * in this context input to this routine in 'valid_flags'. Advances s.
+ * Returns TRUE if the input should be treated as a valid flag, so the next
+ * char may be as well; otherwise FALSE. 'charset' should point to a NUL
+ * upon first call on the current regex. This routine will set it to any
* charset modifier found. The caller shouldn't change it. This way,
* another charset modifier encountered in the parse can be detected as an
* error, as we have decided to allow only one */
switch (c) {
- CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
STATIC char *
S_scan_pat(pTHX_ char *start, I32 type)
{
- dVAR;
PMOP *pm;
char *s;
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
+ unsigned int x_mod_count = 0;
PERL_ARGS_ASSERT_SCAN_PAT;
s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
- if (!s) {
- const char * const delimiter = skipspace(start);
- Perl_croak(aTHX_
- (const char *)
- (*delimiter == '?'
- ? "Search pattern not terminated or ternary operator parsed as search pattern"
- : "Search pattern not terminated" ));
- }
+ if (!s)
+ Perl_croak(aTHX_ "Search pattern not terminated");
pm = (PMOP*)newPMOP(type, 0);
if (PL_multi_open == '?') {
pm->op_pmflags |= PMf_IS_QR;
}
- while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
+ while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
+ &s, &charset, &x_mod_count))
+ {};
/* issue a warning if /c is specified,but /g is not */
if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
{
"Use of /c modifier is meaningless without /g" );
}
+ STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
PL_lex_op = (OP*)pm;
pl_yylval.ival = OP_MATCH;
return s;
STATIC char *
S_scan_subst(pTHX_ char *start)
{
- dVAR;
char *s;
PMOP *pm;
I32 first_start;
line_t first_line;
I32 es = 0;
char charset = '\0'; /* character set modifier */
+ unsigned int x_mod_count = 0;
char *t;
PERL_ARGS_ASSERT_SCAN_SUBST;
s++;
es++;
}
- else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
+ else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
+ &s, &charset, &x_mod_count))
{
break;
}
}
+ STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
if ((pm->op_pmflags & PMf_CONTINUE)) {
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
}
STATIC char *
S_scan_trans(pTHX_ char *start)
{
- dVAR;
char* s;
OP *o;
U8 squash;
STATIC char *
S_scan_heredoc(pTHX_ char *s)
{
- dVAR;
I32 op_type = OP_SCALAR;
I32 len;
SV *tmpstr;
origline + 1 + PL_parser->herelines);
if (!lex_next_chunk(LEX_NO_TERM)
&& (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- SvREFCNT_dec(linestr_save);
+ /* Simply freeing linestr_save might seem simpler here, as it
+ does not matter what PL_linestr points to, since we are
+ about to croak; but in a quote-like op, linestr_save
+ will have been prospectively freed already, via
+ SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ restore PL_linestr. */
+ SvREFCNT_dec_NN(PL_linestr);
+ PL_linestr = linestr_save;
goto interminable;
}
CopLINE_set(PL_curcop, origline);
else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
PL_bufend[-1] = '\n';
#endif
- if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
+ if (*s == term && PL_bufend-s >= len
+ && memEQ(s,PL_tokenbuf + 1,len)) {
SvREFCNT_dec(PL_linestr);
PL_linestr = linestr_save;
PL_linestart = SvPVX(linestr_save);
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
- else if (PL_encoding)
- sv_recode_to_utf8(tmpstr, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(tmpstr, _get_encoding());
}
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
This code handles:
<> read from ARGV
+ <<>> read from ARGV without magic open
<FH> read from filehandle
<pkg::FH> read from package qualified filehandle
<pkg'FH> read from package qualified filehandle
STATIC char *
S_scan_inputsymbol(pTHX_ char *start)
{
- dVAR;
char *s = start; /* current position in buffer */
char *end;
I32 len;
+ bool nomagicopen = FALSE;
char *d = PL_tokenbuf; /* start of temp holding space */
const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
end = strchr(s, '\n');
if (!end)
end = PL_bufend;
- s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
+ if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
+ nomagicopen = TRUE;
+ *d = '\0';
+ len = 0;
+ s += 3;
+ }
+ else
+ s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
/* die if we didn't have space for the contents of the <>,
or if it didn't end, or if we see a newline
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
++d;
intro_sym:
gv = gv_fetchpv(d,
- (PL_in_eval
- ? (GV_ADDMULTI | GV_ADDINEVAL)
- : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
+ GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
SVt_PV);
PL_lex_op = readline_overriden
? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
newUNOP(OP_RV2SV, 0,
newGVOP(OP_GV, 0, gv)));
}
- if (!readline_overriden)
- PL_lex_op->op_flags |= OPf_SPECIAL;
/* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
pl_yylval.ival = OP_NULL;
}
op_append_elem(OP_LIST,
newGVOP(OP_GV, 0, gv),
newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
- : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
+ : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
pl_yylval.ival = OP_NULL;
}
}
char **delimp
)
{
- dVAR;
SV *sv; /* scalar value: string */
const char *tmps; /* temp string, used for delimiter matching */
char *s = start; /* current position in the buffer */
/* skip space before the delimiter */
if (isSPACE(*s)) {
- s = PEEKSPACE(s);
+ s = skipspace(s);
}
/* mark where we are, in case we need to report errors */
sv_catpvn(sv, s, termlen);
s += termlen;
for (;;) {
- if (PL_encoding && !UTF && !re_reparse) {
+ if (IN_ENCODING && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
int offset = s - SvPVX_const(PL_linestr);
- const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+ const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
&offset, (char*)termstr, termlen);
const char *ns;
char *svlast;
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF || re_reparse) {
+ if (!IN_ENCODING || UTF || re_reparse) {
if (keep_delims)
sv_catpvn(sv, s, termlen);
s += termlen;
}
- if (has_utf8 || (PL_encoding && !re_reparse))
+ if (has_utf8 || (IN_ENCODING && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
\d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
\.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
- 0b[01](_?[01])*
- 0[0-7](_?[0-7])*
- 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
+ 0b[01](_?[01])* binary integers
+ 0[0-7](_?[0-7])* octal integers
+ 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
+ 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
thing it reads.
char *
Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
{
- dVAR;
const char *s = start; /* current position in buffer */
char *d; /* destination in temp buffer */
char *e; /* end of temp buffer */
bool floatit; /* boolean: int or float? */
const char *lastub = NULL; /* position of last underbar */
static const char* const number_too_long = "Number too long";
+ /* Hexadecimal floating point.
+ *
+ * In many places (where we have quads and NV is IEEE 754 double)
+ * we can fit the mantissa bits of a NV into an unsigned quad.
+ * (Note that UVs might not be quads even when we have quads.)
+ * This will not work everywhere, though (either no quads, or
+ * using long doubles), in which case we have to resort to NV,
+ * which will probably mean horrible loss of precision due to
+ * multiple fp operations. */
+ bool hexfp = FALSE;
+ int total_bits = 0;
+#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
+# define HEXFP_UQUAD
+ Uquad_t hexfp_uquad = 0;
+ int hexfp_frac_bits = 0;
+#else
+# define HEXFP_NV
+ NV hexfp_nv = 0.0;
+#endif
+ NV hexfp_mult = 1.0;
+ UV high_non_zero = 0; /* highest digit */
PERL_ARGS_ASSERT_SCAN_NUM;
const char *base, *Base, *max;
/* check for hex */
- if (s[1] == 'x' || s[1] == 'X') {
+ if (isALPHA_FOLD_EQ(s[1], 'x')) {
shift = 4;
s += 2;
just_zero = FALSE;
- } else if (s[1] == 'b' || s[1] == 'B') {
+ } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
shift = 1;
s += 2;
just_zero = FALSE;
}
/* check for a decimal in disguise */
- else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
+ else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
goto decimal;
/* so it must be octal */
else {
if (!overflowed) {
x = u << shift; /* make room for the digit */
+ total_bits += shift;
+
if ((x >> shift) != u
&& !(PL_hints & HINT_NEW_BINARY)) {
overflowed = TRUE;
* amount. */
n += (NV) b;
}
+
+ if (high_non_zero == 0 && b > 0)
+ high_non_zero = b;
+
+ /* this could be hexfp, but peek ahead
+ * to avoid matching ".." */
+ if (UNLIKELY(HEXFP_PEEK(s))) {
+ goto out;
+ }
+
break;
}
}
Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
}
+ if (UNLIKELY(HEXFP_PEEK(s))) {
+ /* Do sloppy (on the underbars) but quick detection
+ * (and value construction) for hexfp, the decimal
+ * detection will shortly be more thorough with the
+ * underbar checks. */
+ const char* h = s;
+#ifdef HEXFP_UQUAD
+ hexfp_uquad = u;
+#else /* HEXFP_NV */
+ hexfp_nv = u;
+#endif
+ if (*h == '.') {
+#ifdef HEXFP_NV
+ NV mult = 1 / 16.0;
+#endif
+ h++;
+ while (isXDIGIT(*h) || *h == '_') {
+ if (isXDIGIT(*h)) {
+ U8 b = XDIGIT_VALUE(*h);
+ total_bits += shift;
+#ifdef HEXFP_UQUAD
+ hexfp_uquad <<= shift;
+ hexfp_uquad |= b;
+ hexfp_frac_bits += shift;
+#else /* HEXFP_NV */
+ hexfp_nv += b * mult;
+ mult /= 16.0;
+#endif
+ }
+ h++;
+ }
+ }
+
+ if (total_bits >= 4) {
+ if (high_non_zero < 0x8)
+ total_bits--;
+ if (high_non_zero < 0x4)
+ total_bits--;
+ if (high_non_zero < 0x2)
+ total_bits--;
+ }
+
+ if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
+ bool negexp = FALSE;
+ h++;
+ if (*h == '+')
+ h++;
+ else if (*h == '-') {
+ negexp = TRUE;
+ h++;
+ }
+ if (isDIGIT(*h)) {
+ I32 hexfp_exp = 0;
+ while (isDIGIT(*h) || *h == '_') {
+ if (isDIGIT(*h)) {
+ hexfp_exp *= 10;
+ hexfp_exp += *h - '0';
+#ifdef NV_MIN_EXP
+ if (negexp &&
+ -hexfp_exp < NV_MIN_EXP - 1) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+ "Hexadecimal float: exponent underflow");
+#endif
+ break;
+ }
+ else {
+#ifdef NV_MAX_EXP
+ if (!negexp &&
+ hexfp_exp > NV_MAX_EXP - 1) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+ "Hexadecimal float: exponent overflow");
+ break;
+ }
+#endif
+ }
+ }
+ h++;
+ }
+ if (negexp)
+ hexfp_exp = -hexfp_exp;
+#ifdef HEXFP_UQUAD
+ hexfp_exp -= hexfp_frac_bits;
+#endif
+ hexfp_mult = pow(2.0, hexfp_exp);
+ hexfp = TRUE;
+ goto decimal;
+ }
+ }
+ }
+
if (overflowed) {
if (n > 4294967295.0)
Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
decimal:
d = PL_tokenbuf;
e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
- floatit = FALSE;
+ floatit = FALSE;
+ if (hexfp) {
+ floatit = TRUE;
+ *d++ = '0';
+ *d++ = 'x';
+ s = start + 2;
+ }
/* read next group of digits and _ and copy into d */
- while (isDIGIT(*s) || *s == '_') {
+ while (isDIGIT(*s) || *s == '_' ||
+ UNLIKELY(hexfp && isXDIGIT(*s))) {
/* skip underscores, checking for misplaced ones
if -w is on
*/
/* copy, ignoring underbars, until we run out of digits.
*/
- for (; isDIGIT(*s) || *s == '_'; s++) {
+ for (; isDIGIT(*s) || *s == '_' ||
+ UNLIKELY(hexfp && isXDIGIT(*s));
+ s++) {
/* fixed length buffer check */
if (d >= e)
Perl_croak(aTHX_ "%s", number_too_long);
}
/* read exponent part, if present */
- if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
- floatit = TRUE;
+ if ((isALPHA_FOLD_EQ(*s, 'e')
+ || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
+ && strchr("+-0123456789_", s[1]))
+ {
+ floatit = TRUE;
+
+ /* regardless of whether user said 3E5 or 3e5, use lower 'e',
+ ditto for p (hexfloats) */
+ if ((isALPHA_FOLD_EQ(*s, 'e'))) {
+ /* At least some Mach atof()s don't grok 'E' */
+ *d++ = 'e';
+ }
+ else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
+ *d++ = 'p';
+ }
+
s++;
- /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
- *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
/* stray preinitial _ */
if (*s == '_') {
STORE_NUMERIC_LOCAL_SET_STANDARD();
/* terminate the string */
*d = '\0';
- nv = Atof(PL_tokenbuf);
+ if (UNLIKELY(hexfp)) {
+# ifdef NV_MANT_DIG
+ if (total_bits > NV_MANT_DIG)
+ Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+ "Hexadecimal float: mantissa overflow");
+# endif
+#ifdef HEXFP_UQUAD
+ nv = hexfp_uquad * hexfp_mult;
+#else /* HEXFP_NV */
+ nv = hexfp_nv * hexfp_mult;
+#endif
+ } else {
+ nv = Atof(PL_tokenbuf);
+ }
RESTORE_NUMERIC_LOCAL();
- sv = newSVnv(nv);
+ sv = newSVnv(nv);
}
if ( floatit
STATIC char *
S_scan_formline(pTHX_ char *s)
{
- dVAR;
char *eol;
char *t;
SV * const stuff = newSVpvs("");
if (needargs) {
const char *s2 = s;
while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
- || *s2 == 013)
+ || *s2 == '\v')
s2++;
if (*s2 == '{') {
PL_expect = XTERMBLOCK;
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
SvUTF8_on(stuff);
- else if (PL_encoding)
- sv_recode_to_utf8(stuff, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(stuff, _get_encoding());
}
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
force_next(THING);
I32
Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
{
- dVAR;
const I32 oldsavestack_ix = PL_savestack_ix;
CV* const outsidecv = PL_compcv;
CvFLAGS(PL_compcv) |= flags;
PL_subline = CopLINE(PL_curcop);
- CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
+ CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB));
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
if (outsidecv && CvPADLIST(outsidecv))
- CvPADLIST(PL_compcv)->xpadl_outid =
- PadlistNAMES(CvPADLIST(outsidecv));
+ CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
return oldsavestack_ix;
}
static int
S_yywarn(pTHX_ const char *const s, U32 flags)
{
- dVAR;
-
PERL_ARGS_ASSERT_YYWARN;
PL_in_eval |= EVAL_WARNONLY;
yyerror_pv(s, flags);
- PL_in_eval &= ~EVAL_WARNONLY;
return 0;
}
int
Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
{
- dVAR;
const char *context = NULL;
int contlen = -1;
SV *msg;
}
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
- else if (yychar == -2) { /* YYEMPTY */
+ else if (yychar == YYEMPTY) {
if (PL_lex_state == LEX_NORMAL ||
(PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
sv_catpvs(where_sv, "at end of line");
PL_multi_end = 0;
}
if (PL_in_eval & EVAL_WARNONLY) {
+ PL_in_eval &= ~EVAL_WARNONLY;
Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
}
else
STATIC char*
S_swallow_bom(pTHX_ U8 *s)
{
- dVAR;
const STRLEN slen = SvCUR(PL_linestr);
PERL_ARGS_ASSERT_SWALLOW_BOM;
static I32
S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
- dVAR;
SV *const filter = FILTER_DATA(idx);
/* We re-use this each time round, throwing the contents away before we
return. */
char *
Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
{
- dVAR;
const char *pos = s;
const char *start = s;
PL_bufptr = s;
if (d == PL_tokenbuf+1)
return NULL;
- *d = 0;
var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
OPf_MOD | (OPpLVAL_INTRO<<8));
var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(1))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Odd name/value argument "
- "for subroutine"))));
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))));
if (pos != min_arity)
chkop = newLOGOP(OP_AND, 0,
newBINOP(OP_GT, 0,
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(min_arity))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Too few arguments for subroutine"))))),
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))))),
initops);
}
if (max_arity != -1) {
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(max_arity))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Too many arguments for subroutine"))))),
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))))),
initops);
}
return initops;