#define XFAKEBRACK 0x80
#ifdef USE_UTF8_SCRIPTS
-# define UTF (!IN_BYTES)
+# define UTF cBOOL(!IN_BYTES)
#else
-# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
+# define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
#endif
/* The maximum number of characters preceding the unrecognized one to display */
#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
-#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
+#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
-#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
-#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
-#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
-#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
+#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
+#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
+#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
+#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
-#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
+#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
#define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
-#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
+#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
}
if (name)
Perl_sv_catpv(aTHX_ report, name);
- else if ((char)rv > ' ' && (char)rv <= '~')
+ else if (isGRAPH(rv))
{
Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
if ((char)rv == 'p')
pl_yylval.ival = OP_DORASSIGN;
toketype = ASSIGNOP;
}
- return toketype;
+ return REPORT(toketype);
}
/*
parser->bufend = parser->bufptr + SvCUR(parser->linestr);
parser->last_lop = parser->last_uni = NULL;
- assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+ STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
SV * const sv = newSVpvn_utf8(start, len,
!IN_BYTES
&& UTF
- && !is_ascii_string((const U8*)start, len)
+ && !is_invariant_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}
* a keyword (do this if the word is a label, e.g. goto FOO)
* int allow_pack : if true, : characters will also be allowed (require,
* use, etc. do this)
- * int allow_initial_tick : used by the "sub" lexer only.
*/
STATIC char *
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (check_keyword) {
char *s2 = PL_tokenbuf;
+ STRLEN len2 = len;
if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
- s2 += 6, len -= 6;
- if (keyword(s2, len, 0))
+ s2 += 6, len2 -= 6;
+ if (keyword(s2, len2, 0))
return start;
}
if (token == METHOD) {
* symbol meaning, e.g. \x{2E} would be confused with a dot. But
* in spite of this, we do have to process \N here while the proper
* charnames handler is in scope. See bugs #56444 and #62056.
+ *
* There is a complication because \N in a pattern may also stand
* for 'match a non-nl', and not mean a charname, in which case its
* processing should be deferred to the regex compiler. To be a
SvPOK_on(sv);
*d = '\0';
/* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- UNISKIP(uv) + (STRLEN)(send - s) + 1);
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+ /* Above-latin1 in string
+ * implies no encoding */
+ |SV_UTF8_NO_ENCODING,
+ UNISKIP(uv) + (STRLEN)(send - s) + 1);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
continue;
case 'N':
- /* In a non-pattern \N must be a named character, like \N{LATIN
- * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
- * mean to match a non-newline. For non-patterns, named
- * characters are converted to their string equivalents. In
- * patterns, named characters are not converted to their
- * ultimate forms for the same reasons that other escapes
- * aren't. Instead, they are converted to the \N{U+...} form
- * to get the value from the charnames that is in effect right
- * now, while preserving the fact that it was a named character
- * so that the regex compiler knows this */
-
- /* The structure of this section of code (besides checking for
+ /* In a non-pattern \N must be like \N{U+0041}, or it can be a
+ * named character, like \N{LATIN SMALL LETTER A}, or a named
+ * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
+ * GRAVE}. For convenience all three forms are referred to as
+ * "named characters" below.
+ *
+ * For patterns, \N also can mean to match a non-newline. Code
+ * before this 'switch' statement should already have handled
+ * this situation, and hence this code only has to deal with
+ * the named character cases.
+ *
+ * For non-patterns, the named characters are converted to
+ * their string equivalents. In patterns, named characters are
+ * not converted to their ultimate forms for the same reasons
+ * that other escapes aren't. Instead, they are converted to
+ * the \N{U+...} form to get the value from the charnames that
+ * is in effect right now, while preserving the fact that it
+ * was a named character, so that the regex compiler knows
+ * this.
+ *
+ * The structure of this section of code (besides checking for
* errors and upgrading to utf8) is:
- * Further disambiguate between the two meanings of \N, and if
- * not a charname, go process it elsewhere
- * If of form \N{U+...}, pass it through if a pattern;
- * otherwise convert to utf8
- * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
- * pattern; otherwise convert to utf8 */
-
- /* Here, s points to the 'N'; the test below is guaranteed to
- * succeed if we are being called on a pattern as we already
- * know from a test above that the next character is a '{'.
- * On a non-pattern \N must mean 'named sequence, which
- * requires braces */
+ * If the named character is of the form \N{U+...}, pass it
+ * through if a pattern; otherwise convert the code point
+ * to utf8
+ * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
+ * if a pattern; otherwise convert to utf8
+ *
+ * If the regex compiler should ever need to differentiate
+ * between the \N{U+...} and \N{name} forms, that could easily
+ * be done here by stripping any leading zeros from the
+ * \N{U+...} case, and adding them to the other one. */
+
+ /* Here, 's' points to the 'N'; the test below is guaranteed to
+ * succeed if we are being called on a pattern, as we already
+ * know from a test above that the next character is a '{'. A
+ * non-pattern \N must mean 'named character', which requires
+ * braces */
s++;
if (*s != '{') {
yyerror("Missing braces on \\N{}");
| PERL_SCAN_DISALLOW_PREFIX;
STRLEN len;
- /* For \N{U+...}, the '...' is a unicode value even on
- * EBCDIC machines */
s += 2; /* Skip to next char after the 'U+' */
len = e - s;
uv = grok_hex(s, &len, &flags, NULL);
}
if (PL_lex_inpat) {
-
- /* On non-EBCDIC platforms, pass through to the regex
- * compiler unchanged. The reason we evaluated the
- * number above is to make sure there wasn't a syntax
- * error. But on EBCDIC we convert to native so
- * downstream code can continue to assume it's native
- */
s -= 5; /* Include the '\N{U+' */
#ifdef EBCDIC
- d += my_snprintf(d, e - s + 1 + 1, /* includes the }
+ /* On EBCDIC platforms, in \N{U+...}, the '...' is a
+ * Unicode value, so convert to native so downstream
+ * code can continue to assume it's native */
+ d += my_snprintf(d, e - s + 1 + 1, /* includes the '}'
and the \0 */
- "\\N{U+%X}",
- (unsigned int) UNI_TO_NATIVE(uv));
+ "\\N{U+%X}",
+ (unsigned int) UNI_TO_NATIVE(uv));
#else
- Copy(s, d, e - s + 1, char); /* 1 = include the } */
+ /* On non-EBCDIC platforms, pass it through unchanged.
+ * The reason we evaluated the number above is to make
+ * sure there wasn't a syntax error. */
+ Copy(s, d, e - s + 1, char); /* +1 is for the '}' */
d += e - s + 1;
#endif
}
else { /* Not a pattern: convert the hex to string */
- /* If destination is not in utf8, unconditionally
+ /* If the destination is not in utf8, unconditionally
* recode it to be so. This is because \N{} implies
* Unicode semantics, and scalars have to be in utf8
* to guarantee those semantics */
* through the string. Each character takes up
* 2 hex digits plus either a trailing dot or
* the "}" */
+ const char initial_text[] = "\\N{U+";
+ const STRLEN initial_len = sizeof(initial_text)
+ - 1;
d = off + SvGROW(sv, off
+ 3 * len
- + 6 /* For the "\N{U+", and
- trailing NUL */
+
+ /* +1 for trailing NUL */
+ + initial_len + 1
+
+ (STRLEN)(send - e));
- Copy("\\N{U+", d, 5, char);
- d += 5;
+ Copy(initial_text, d, initial_len, char);
+ d += initial_len;
while (str < str_end) {
char hex_string[4];
int len =
d += 3;
str++;
}
- d--; /* We will overwrite below the final
+ d--; /* Below, we will overwrite the final
dot with a right brace */
}
else {
const STRLEN off = d - SvPVX_const(sv);
d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
}
- if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */
- sv_utf8_upgrade(res);
+ if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
+ sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
str = SvPV_const(res, len);
}
Copy(str, d, len, char);
" >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (PL_encoding && !has_utf8) {
- sv_recode_to_utf8(sv, PL_encoding);
+ if (IN_ENCODING && !has_utf8) {
+ sv_recode_to_utf8(sv, _get_encoding());
if (SvUTF8(sv))
has_utf8 = TRUE;
}
Perl_croak(aTHX_
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
- case ' ': case '\t': case '\f': case 013:
+ case ' ': case '\t': case '\f': case '\v':
s++;
goto retry;
case '#':
OPERATOR(HASHBRACK);
}
if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
- /* ${...} or @{...} etc., but not print {...} */
- PL_expect = XTERM;
- break;
+ /* ${...} or @{...} etc., but not print {...}
+ * Skip the disambiguation and treat this as a block.
+ */
+ goto block_expectation;
}
/* This hack serves to disambiguate a pair of curlies
* as being a block or an anon hash. Normally, expectation
|| (*t == '=' && t[1] == '>')))
OPERATOR(HASHBRACK);
if (PL_expect == XREF)
- PL_expect = XTERM;
+ {
+ block_expectation:
+ /* If there is an opening brace or 'sub:', treat it
+ as a term to make ${{...}}{k} and &{sub:attr...}
+ dwim. Otherwise, treat it as a statement, so
+ map {no strict; ...} works.
+ */
+ s = skipspace(s);
+ if (*s == '{') {
+ PL_expect = XTERM;
+ break;
+ }
+ if (strnEQ(s, "sub", 3)) {
+ d = s + 3;
+ d = skipspace(d);
+ if (*d == ':') {
+ PL_expect = XTERM;
+ break;
+ }
+ }
+ PL_expect = XSTATE;
+ }
else {
PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
PL_expect = XSTATE;
break;
}
pl_yylval.ival = CopLINE(PL_curcop);
- if (isSPACE(*s) || *s == '#')
- PL_copline = NOLINE; /* invalidate current command line number */
+ PL_copline = NOLINE; /* invalidate current command line number */
TOKEN(formbrack ? '=' : '{');
case '}':
if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
if (PL_expect != XOPERATOR) {
if (s[1] != '<' && !strchr(s,'>'))
check_uni();
- if (s[1] == '<')
+ if (s[1] == '<' && s[2] != '>')
s = scan_heredoc(s);
else
s = scan_inputsymbol(s);
}
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
if (!isALPHA(*start) && (PL_expect == XTERM
- || PL_expect == XSTATE
+ || PL_expect == XREF || PL_expect == XSTATE
|| PL_expect == XTERMORDORDOR)) {
GV *const gv = gv_fetchpvn_flags(s, start - s,
UTF ? SVf_UTF8 : 0, SVt_PVCV);
char tmpbuf[sizeof PL_tokenbuf + 1];
*tmpbuf = '&';
Copy(PL_tokenbuf, tmpbuf+1, len, char);
- off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, len+1, 0);
if (off != NOT_IN_PAD) {
assert(off); /* we assume this is boolean-true below */
if (PAD_COMPNAME_FLAGS_isOUR(off)) {
rv2cv_op =
newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
cv = lex
- ? isGV(gv) ? GvCV(gv) : (CV *)gv
+ ? isGV(gv)
+ ? GvCV(gv)
+ : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
+ ? (CV *)SvRV(gv)
+ : ((CV *)gv)
: rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
}
if (!IN_BYTES) {
if (UTF)
PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
- else if (PL_encoding) {
+ else if (IN_ENCODING) {
SV *name;
dSP;
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(PL_encoding);
+ XPUSHs(_get_encoding());
PUTBACK;
call_method("name", G_SCALAR);
SPAGAIN;
}
case KEY___SUB__:
- FUN0OP(newPVOP(OP_RUNCV,0,NULL));
+ FUN0OP(CvCLONE(PL_compcv)
+ ? newOP(OP_RUNCV, 0)
+ : newPVOP(OP_RUNCV,0,NULL));
case KEY_AUTOLOAD:
case KEY_DESTROY:
*PL_tokenbuf = '&';
if (memchr(tmpbuf, ':', len) || key != KEY_sub
|| pad_findmy_pvn(
- PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+ PL_tokenbuf, len + 1, 0
) != NOT_IN_PAD)
sv_setpvn(PL_subname, tmpbuf, len);
else {
}
else {
if (has_colon) {
+ /* "my" variable %s can't be in a package */
/* PL_no_myglob is constant */
GCC_DIAG_IGNORE(-Wformat-nonliteral);
yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
- PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
+ PL_in_my == KEY_my ? "my" : "state",
+ *PL_tokenbuf == '&' ? "subroutin" : "variabl",
+ PL_tokenbuf),
UTF ? SVf_UTF8 : 0);
GCC_DIAG_RESTORE;
}
if (!has_colon) {
if (!PL_in_my)
tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
- UTF ? SVf_UTF8 : 0);
+ 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
s++;
if (*s == ',') {
GV* gv;
+ PADOFFSET off;
if (keyword(w, s - w, 0))
return;
gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
if (gv && GvCVu(gv))
return;
+ if (s - w <= 254) {
+ char tmpbuf[256];
+ Copy(w, tmpbuf+1, s - w, char);
+ *tmpbuf = '&';
+ off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
+ if (off != NOT_IN_PAD) return;
+ }
Perl_croak(aTHX_ "No comma allowed after %s", what);
}
}
/* Is the byte 'd' a legal single character identifier name? 'u' is true
* iff Unicode semantics are to be used. The legal ones are any of:
- * a) ASCII digits
- * b) ASCII punctuation
+ * a) all ASCII characters except:
+ * 1) space-type ones, like \t and SPACE;
+ 2) NUL;
+ * 3) '{'
+ * The final case currently doesn't get this far in the program, so we
+ * don't test for it. If that were to change, it would be ok to allow it.
* c) When not under Unicode rules, any upper Latin1 character
- * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
- * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
- * the \s ones. */
-#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
- || isDIGIT_A((U8)(d)) \
- || (!(u) && !isASCII((U8)(d))) \
- || ((((U8)(d)) < 32) \
- && (((((U8)(d)) >= 14) \
- || (((U8)(d)) <= 8 && (d) != 0) \
- || (((U8)(d)) == 13)))) \
- || (((U8)(d)) == toCTRL('?')))
- if (s < PL_bufend
- && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
+ * d) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ * Because all ASCII characters have the same representation whether
+ * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ * '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
+#ifdef EBCDIC
+# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ (isGRAPH_A(*(s)) || ((is_utf8) \
+ ? isIDFIRST_utf8((U8*) (s)) \
+ : (isGRAPH_L1(*s) \
+ && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+#else
+# define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \
+ && LIKELY(*(s) != '\0') \
+ && (! is_utf8 \
+ || isASCII_utf8((U8*) (s)) \
+ || isIDFIRST_utf8((U8*) (s))))
+#endif
+ if ((s <= PL_bufend - (is_utf8)
+ ? UTF8SKIP(s)
+ : 1)
+ && VALID_LEN_ONE_IDENT(s, is_utf8))
{
- if ( isCNTRL_A((U8)*s) ) {
- deprecate("literal control characters in variable names");
+ /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
+ * because often it has no graphic representation. (We can't get to
+ * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
+ * test for it.) */
+ if ((is_utf8)
+ ? ! isGRAPH_utf8( (U8*) s)
+ : (! isGRAPH_L1( (U8) *s)
+ || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
+ {
+ /* Split messages for back compat */
+ if (isCNTRL_A( (U8) *s)) {
+ deprecate("literal control characters in variable names");
+ }
+ else {
+ deprecate("literal non-graphic characters in variable names");
+ }
}
if (is_utf8) {
}
static bool
-S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
-
- /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
- * the parse starting at 's', based on the subset that are valid in this
- * context input to this routine in 'valid_flags'. Advances s. Returns
- * TRUE if the input should be treated as a valid flag, so the next char
- * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
- * first call on the current regex. This routine will set it to any
+S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
+
+ /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
+ * found in the parse starting at 's', based on the subset that are valid
+ * in this context input to this routine in 'valid_flags'. Advances s.
+ * Returns TRUE if the input should be treated as a valid flag, so the next
+ * char may be as well; otherwise FALSE. 'charset' should point to a NUL
+ * upon first call on the current regex. This routine will set it to any
* charset modifier found. The caller shouldn't change it. This way,
* another charset modifier encountered in the parse can be detected as an
* error, as we have decided to allow only one */
switch (c) {
- CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
+ unsigned int x_mod_count = 0;
PERL_ARGS_ASSERT_SCAN_PAT;
pm->op_pmflags |= PMf_IS_QR;
}
- while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
+ while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
+ &s, &charset, &x_mod_count))
+ {};
/* issue a warning if /c is specified,but /g is not */
if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
{
"Use of /c modifier is meaningless without /g" );
}
+ STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
PL_lex_op = (OP*)pm;
pl_yylval.ival = OP_MATCH;
return s;
line_t first_line;
I32 es = 0;
char charset = '\0'; /* character set modifier */
+ unsigned int x_mod_count = 0;
char *t;
PERL_ARGS_ASSERT_SCAN_SUBST;
s++;
es++;
}
- else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
+ else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
+ &s, &charset, &x_mod_count))
{
break;
}
}
+ STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+
if ((pm->op_pmflags & PMf_CONTINUE)) {
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
}
origline + 1 + PL_parser->herelines);
if (!lex_next_chunk(LEX_NO_TERM)
&& (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- SvREFCNT_dec(linestr_save);
+ /* Simply freeing linestr_save might seem simpler here, as it
+ does not matter what PL_linestr points to, since we are
+ about to croak; but in a quote-like op, linestr_save
+ will have been prospectively freed already, via
+ SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ restore PL_linestr. */
+ SvREFCNT_dec_NN(PL_linestr);
+ PL_linestr = linestr_save;
goto interminable;
}
CopLINE_set(PL_curcop, origline);
else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
PL_bufend[-1] = '\n';
#endif
- if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
+ if (*s == term && PL_bufend-s >= len
+ && memEQ(s,PL_tokenbuf + 1,len)) {
SvREFCNT_dec(PL_linestr);
PL_linestr = linestr_save;
PL_linestart = SvPVX(linestr_save);
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
- else if (PL_encoding)
- sv_recode_to_utf8(tmpstr, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(tmpstr, _get_encoding());
}
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
This code handles:
<> read from ARGV
+ <<>> read from ARGV without magic open
<FH> read from filehandle
<pkg::FH> read from package qualified filehandle
<pkg'FH> read from package qualified filehandle
char *s = start; /* current position in buffer */
char *end;
I32 len;
+ bool nomagicopen = FALSE;
char *d = PL_tokenbuf; /* start of temp holding space */
const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
end = strchr(s, '\n');
if (!end)
end = PL_bufend;
- s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
+ if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
+ nomagicopen = TRUE;
+ *d = '\0';
+ len = 0;
+ s += 3;
+ }
+ else
+ s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
/* die if we didn't have space for the contents of the <>,
or if it didn't end, or if we see a newline
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
op_append_elem(OP_LIST,
newGVOP(OP_GV, 0, gv),
newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
- : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
+ : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
pl_yylval.ival = OP_NULL;
}
}
sv_catpvn(sv, s, termlen);
s += termlen;
for (;;) {
- if (PL_encoding && !UTF && !re_reparse) {
+ if (IN_ENCODING && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
int offset = s - SvPVX_const(PL_linestr);
- const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+ const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
&offset, (char*)termstr, termlen);
const char *ns;
char *svlast;
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF || re_reparse) {
+ if (!IN_ENCODING || UTF || re_reparse) {
if (keep_delims)
sv_catpvn(sv, s, termlen);
s += termlen;
}
- if (has_utf8 || (PL_encoding && !re_reparse))
+ if (has_utf8 || (IN_ENCODING && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
if (needargs) {
const char *s2 = s;
while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
- || *s2 == 013)
+ || *s2 == '\v')
s2++;
if (*s2 == '{') {
PL_expect = XTERMBLOCK;
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
SvUTF8_on(stuff);
- else if (PL_encoding)
- sv_recode_to_utf8(stuff, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(stuff, _get_encoding());
}
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
force_next(THING);
CvFLAGS(PL_compcv) |= flags;
PL_subline = CopLINE(PL_curcop);
- CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
+ CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB));
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
if (outsidecv && CvPADLIST(outsidecv))
- CvPADLIST(PL_compcv)->xpadl_outid =
- PadlistNAMES(CvPADLIST(outsidecv));
+ CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
return oldsavestack_ix;
}
PL_in_eval |= EVAL_WARNONLY;
yyerror_pv(s, flags);
- PL_in_eval &= ~EVAL_WARNONLY;
return 0;
}
}
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
- else if (yychar == -2) { /* YYEMPTY */
+ else if (yychar == YYEMPTY) {
if (PL_lex_state == LEX_NORMAL ||
(PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
sv_catpvs(where_sv, "at end of line");
PL_multi_end = 0;
}
if (PL_in_eval & EVAL_WARNONLY) {
+ PL_in_eval &= ~EVAL_WARNONLY;
Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
}
else
PL_bufptr = s;
if (d == PL_tokenbuf+1)
return NULL;
- *d = 0;
var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
OPf_MOD | (OPpLVAL_INTRO<<8));
var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(1))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Odd name/value argument "
- "for subroutine"))));
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))));
if (pos != min_arity)
chkop = newLOGOP(OP_AND, 0,
newBINOP(OP_GT, 0,
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(min_arity))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Too few arguments for subroutine"))))),
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))))),
initops);
}
if (max_arity != -1) {
scalar(newUNOP(OP_RV2AV, 0,
newGVOP(OP_GV, 0, PL_defgv))),
newSVOP(OP_CONST, 0, newSViv(max_arity))),
- newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
- newSVOP(OP_CONST, 0,
- newSVpvs("Too many arguments for subroutine"))))),
+ op_convert_list(OP_DIE, 0,
+ op_convert_list(OP_SPRINTF, 0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0,
+ newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
+ newSLICEOP(0,
+ op_append_list(OP_LIST,
+ newSVOP(OP_CONST, 0, newSViv(1)),
+ newSVOP(OP_CONST, 0, newSViv(2))),
+ newOP(OP_CALLER, 0))))))),
initops);
}
return initops;