/* LEX_* are values for PL_lex_state, the state of the lexer.
* They are arranged oddly so that the guard on the switch statement
* can get by with a single comparison (if the compiler is smart enough).
+ *
+ * These values refer to the various states within a sublex parse,
+ * i.e. within a double quotish string
*/
/* #define LEX_NOTPARSING 11 is done in perl.h. */
* The UNIDOR macro is for unary functions that can be followed by the //
* operator (such as C<shift // 0>).
*/
-#define UNI2(f,x) { \
+#define UNI3(f,x,have_x) { \
pl_yylval.ival = f; \
- PL_expect = x; \
+ if (have_x) PL_expect = x; \
PL_bufptr = s; \
PL_last_uni = PL_oldbufptr; \
PL_last_lop_op = f; \
s = PEEKSPACE(s); \
return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
}
-#define UNI(f) UNI2(f,XTERM)
-#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
+#define UNI(f) UNI3(f,XTERM,1)
+#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
#define UNIPROTO(f,optional) { \
if (optional) PL_last_uni = PL_oldbufptr; \
OPERATOR(f); \
}
-#define UNIBRACK(f) { \
- pl_yylval.ival = f; \
- PL_bufptr = s; \
- PL_last_uni = PL_oldbufptr; \
- if (*s == '(') \
- return REPORT( (int)FUNC1 ); \
- s = PEEKSPACE(s); \
- return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
- }
+#define UNIBRACK(f) UNI3(f,0,0)
/* grandfather return to old style */
#define OLDLOP(f) \
SV *linestr;
char *buf;
STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
- STRLEN linestart_pos, last_uni_pos, last_lop_pos;
+ STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
linestr = PL_parser->linestr;
buf = SvPVX(linestr);
if (len <= SvLEN(linestr))
linestart_pos = PL_parser->linestart - buf;
last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
+ re_eval_start_pos = PL_sublex_info.re_eval_start ?
+ PL_sublex_info.re_eval_start - buf : 0;
+
buf = sv_grow(linestr, len);
+
PL_parser->bufend = buf + bufend_pos;
PL_parser->bufptr = buf + bufptr_pos;
PL_parser->oldbufptr = buf + oldbufptr_pos;
PL_parser->last_uni = buf + last_uni_pos;
if (PL_parser->last_lop)
PL_parser->last_lop = buf + last_lop_pos;
+ if (PL_sublex_info.re_eval_start)
+ PL_sublex_info.re_eval_start = buf + re_eval_start_pos;
return buf;
}
* converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
* interact with PL_lex_state, and create fake ( ... ) argument lists
* to handle functions and concatenation.
- * They assume that whoever calls them will be setting up a fake
- * join call, because each subthing puts a ',' after it. This lets
- * "lower \luPpEr"
- * become
- * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
- *
- * (I'm not sure whether the spurious commas at the end of lcfirst's
- * arguments and join's arguments are created or not).
+ * For example,
+ * "foo\lbar"
+ * is tokenised as
+ * stringify ( const[foo] concat lcfirst ( const[bar] ) )
*/
/*
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
+ SAVEPPTR(PL_sublex_info.re_eval_start);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
SAVECOPLINE(PL_curcop);
PL_linestr = PL_lex_stuff;
PL_lex_stuff = NULL;
+ PL_sublex_info.re_eval_start = NULL;
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
/*
scan_const
- Extracts a pattern, double-quoted string, or transliteration. This
- is terrifying code.
+ Extracts the next constant part of a pattern, double-quoted string,
+ or transliteration. This is terrifying code.
+
+ For example, in parsing the double-quoted string "ab\x63$d", it would
+ stop at the '$' and return an OP_CONST containing 'abc'.
It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
processing a pattern (PL_lex_inpat is true), a transliteration
Returns a pointer to the character scanned up to. If this is
advanced from the start pointer supplied (i.e. if anything was
- successfully parsed), will leave an OP for the substring scanned
+ successfully parsed), will leave an OP_CONST for the substring scanned
in pl_yylval. Caller must intuit reason for not parsing further
by looking at the next characters herself.
In patterns:
- backslashes:
- constants: \N{NAME} only
- case and quoting: \U \Q \E
- stops on @ and $, but not for $ as tail anchor
+ expand:
+ \N{ABC} => \N{U+41.42.43}
+
+ pass through:
+ all other \-char, including \N and \N{ apart from \N{ABC}
+
+ stops on:
+ @ and $ where it appears to be a var, but not for $ as tail anchor
+ \l \L \u \U \Q \E
+ (?{ or (??{
+
In transliterations:
characters are VERY literal, except for - not at the start or end
it's a tail anchor if $ is the last thing in the string, or if it's
followed by one of "()| \r\n\t"
- \1 (backreferences) are turned into $1
+ \1 (backreferences) are turned into $1 in substitutions
The structure of the code is
while (there's a character to process) {
register char *d = SvPVX(sv); /* destination for copies */
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
+ bool in_charclass = FALSE; /* within /[...]/ */
bool has_utf8 = FALSE; /* Output constant is UTF8 */
bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
to be UTF8? But, this can
/* if we get here, we're not doing a transliteration */
- /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
- except for the last char, which will be done separately. */
+ else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = TRUE;
+ }
+
+ else if (*s == ']' && PL_lex_inpat && in_charclass) {
+ char *s1 = s-1;
+ int esc = 0;
+ while (s1 >= start && *s1-- == '\\')
+ esc = !esc;
+ if (!esc)
+ in_charclass = FALSE;
+ }
+
+ /* skip for regexp comments /(?#comment)/, except for the last
+ * char, which will be done separately.
+ * Stop on (?{..}) and friends */
+
else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
}
- else if (s[2] == '{' /* This should match regcomp.c */
- || (s[2] == '?' && s[3] == '{'))
+ else if (!PL_lex_casemods && !in_charclass &&
+ ( s[2] == '{' /* This should match regcomp.c */
+ || (s[2] == '?' && s[3] == '{')))
{
- I32 count = 1;
- char *regparse = s + (s[2] == '{' ? 3 : 4);
- char c;
-
- while (count && (c = *regparse)) {
- if (c == '\\' && regparse[1])
- regparse++;
- else if (c == '{')
- count++;
- else if (c == '}')
- count--;
- regparse++;
- }
- if (*regparse != ')')
- regparse--; /* Leave one char for continuation. */
- while (s < regparse)
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ break;
}
}
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
}
+ /* no further processing of single-quoted regex */
+ else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
+ goto default_action;
+
/* check for embedded arrays
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
/* FALL THROUGH */
default:
{
- if ((isALPHA(*s) || isDIGIT(*s)))
+ if ((isALNUMC(*s)))
Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
"Unrecognized escape \\%c passed through",
*s);
/* eg. \x24 indicates the hex constant 0x24 */
case 'x':
- ++s;
- if (*s == '{') {
- char* const e = strchr(s, '}');
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
- PERL_SCAN_DISALLOW_PREFIX;
+ {
STRLEN len;
+ const char* error;
- ++s;
- if (!e) {
- yyerror("Missing right brace on \\x{}");
+ bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
+ s += len;
+ if (! valid) {
+ yyerror(error);
continue;
}
- len = e - s;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s = e + 1;
- }
- else {
- {
- STRLEN len = 2;
- I32 flags = PERL_SCAN_DISALLOW_PREFIX;
- uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
- s += len;
- }
}
NUM_ESCAPE_INSERT:
} else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
type = "s";
typelen = 1;
+ } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
+ type = "q";
+ typelen = 1;
} else {
type = "qq";
typelen = 2;
*
* First argument is the stuff after the first token, e.g. "bar".
*
- * Not a method if bar is a filehandle.
+ * Not a method if foo is a filehandle.
* Not a method if foo is a subroutine prototyped to take a filehandle.
* Not a method if it's really "Foo $bar"
* Method if it's "foo $bar"
PERL_ARGS_ASSERT_INTUIT_METHOD;
- if (gv) {
- if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
+ if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
return 0;
- if (cv) {
- if (SvPOK(cv)) {
+ if (cv && SvPOK(cv)) {
const char *proto = CvPROTO(cv);
if (proto) {
if (*proto == ';')
if (*proto == '*')
return 0;
}
- }
- } else
- gv = NULL;
}
s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
/* start is the beginning of the possible filehandle/object,
*/
if (*start == '$') {
- if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
+ if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
isUPPER(*PL_tokenbuf))
return 0;
#ifdef PERL_MAD
if (indirgv && GvCVu(indirgv))
return 0;
/* filehandle or package name makes it a method */
- if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
+ if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
#ifdef PERL_MAD
soff = s - SvPVX(PL_linestr);
#endif
if (PL_expect != XSTATE)
yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
is_use ? "use" : "no"));
+ PL_expect = XTERM;
s = SKIPSPACE1(s);
if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
s = force_version(s, TRUE);
register char *s = PL_bufptr;
register char *d;
STRLEN len;
- bool bof = FALSE;
+ bool bof = FALSE, formbrack = FALSE;
U32 fake_eof = 0;
/* orig_keyword, gvp, and gv are initialized here because
case LEX_INTERPSTART:
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
- DEBUG_T({ PerlIO_printf(Perl_debug_log,
+ DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
"### Interpolated variable\n"); });
PL_expect = XTERM;
PL_lex_dojoin = (*PL_bufptr == '@');
NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
force_next(FUNC);
}
+ /* Convert (?{...}) and friends to 'do {...}' */
+ if (PL_lex_inpat && *PL_bufptr == '(') {
+ PL_sublex_info.re_eval_start = PL_bufptr;
+ PL_bufptr += 2;
+ if (*PL_bufptr != '{')
+ PL_bufptr++;
+ start_force(PL_curforce);
+ /* XXX probably need a CURMAD(something) here */
+ PL_expect = XTERMBLOCK;
+ force_next(DO);
+ }
+
if (PL_lex_starts++) {
s = PL_bufptr;
#ifdef PERL_MAD
Perl_croak(aTHX_ "Bad evalled substitution pattern");
PL_lex_repl = NULL;
}
+ if (PL_sublex_info.re_eval_start) {
+ if (*PL_bufptr != ')')
+ Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
+ PL_bufptr++;
+ /* having compiled a (?{..}) expression, return the original
+ * text too, as a const */
+ start_force(PL_curforce);
+ /* XXX probably need a CURMAD(something) here */
+ NEXTVAL_NEXTTOKE.opval =
+ (OP*)newSVOP(OP_CONST, 0,
+ newSVpvn(PL_sublex_info.re_eval_start,
+ PL_bufptr - PL_sublex_info.re_eval_start));
+ force_next(THING);
+ PL_sublex_info.re_eval_start = NULL;
+ PL_expect = XTERM;
+ return REPORT(',');
+ }
+
/* FALLTHROUGH */
case LEX_INTERPCONCAT:
#ifdef DEBUGGING
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
- if (SvIVX(PL_linestr) == '\'') {
+ /* m'foo' still needs to be parsed for possible (?{...}) */
+ if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
SV *sv = newSVsv(PL_linestr);
- if (!PL_lex_inpat)
- sv = tokeq(sv);
- else if ( PL_hints & HINT_NEW_RE )
- sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
+ sv = tokeq(sv);
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
s = PL_bufend;
}
return yylex();
case LEX_FORMLINE:
- PL_lex_state = LEX_NORMAL;
+ PL_lex_state = PL_parser->form_lex_state;
s = scan_formline(PL_bufptr);
if (!PL_lex_formbrack)
+ {
+ formbrack = TRUE;
goto rightbracket;
+ }
OPERATOR(';');
}
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
- d = scan_str(d,TRUE,TRUE);
+ d = scan_str(d,TRUE,TRUE,FALSE);
if (!d) {
/* MUST advance bufptr here to avoid bogus
"at end of line" context messages from yyerror().
pl_yylval.ival = CopLINE(PL_curcop);
if (isSPACE(*s) || *s == '#')
PL_copline = NOLINE; /* invalidate current command line number */
- TOKEN('{');
+ TOKEN(formbrack ? '=' : '{');
case '}':
if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
TOKEN(0);
else
PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
PL_lex_allbrackets--;
- if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
- PL_lex_formbrack = 0;
if (PL_lex_state == LEX_INTERPNORMAL) {
if (PL_lex_brackets == 0) {
if (PL_expect & XFAKEBRACK) {
curmad('X', newSVpvn(s-1,1));
CURMAD('_', PL_thiswhite);
}
- force_next('}');
+ force_next(formbrack ? '.' : '}');
+ if (formbrack) LEAVE;
#ifdef PERL_MAD
if (!PL_thistoken)
PL_thistoken = newSVpvs("");
if (*t == '\n' || *t == '#') {
s--;
PL_expect = XBLOCK;
+ formbrack = TRUE;
+ ENTER;
+ SAVEI8(PL_parser->form_lex_state);
+ PL_parser->form_lex_state = PL_lex_state;
goto leftbracket;
}
}
{
PL_lex_formbrack = 0;
PL_expect = XSTATE;
+ formbrack = TRUE;
goto rightbracket;
}
if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
TERM(THING);
case '\'':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '"':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
TERM(sublex_start());
case '`':
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
op_free(rv2cv_op);
SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
- pl_yylval.opval->op_private = 0;
+ pl_yylval.opval->op_private = OPpCONST_FOLDED;
pl_yylval.opval->op_flags |= OPf_SPECIAL;
TOKEN(WORD);
}
case KEY_CORE:
if (*s == ':' && s[1] == ':') {
- s += 2;
+ STRLEN olen = len;
d = s;
+ s += 2;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
- if (!(tmp = keyword(PL_tokenbuf, len, 1)))
+ if ((*s == ':' && s[1] == ':')
+ || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
+ {
+ s = d;
+ len = olen;
+ Copy(PL_bufptr, PL_tokenbuf, olen, char);
+ goto just_a_word;
+ }
+ if (!tmp)
Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
SVfARG(newSVpvn_flags(PL_tokenbuf, len,
(UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
s = SKIPSPACE1(s);
if (*s == '{')
PRETERMBLOCK(DO);
- if (*s != '\'')
- s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ if (*s != '\'') {
+ d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
+ if (len) {
+ d = SKIPSPACE1(d);
+ if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
+ }
+ }
if (orig_keyword == KEY_do) {
orig_keyword = 0;
pl_yylval.ival = 1;
case KEY_no:
s = tokenize_use(0, s);
- OPERATOR(USE);
+ TERM(USE);
case KEY_not:
if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
LOP(OP_PIPE_OP,XTERM);
case KEY_q:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_CONST;
case KEY_qw: {
OP *words = NULL;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
PL_expect = XOPERATOR;
}
case KEY_qq:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
pl_yylval.ival = OP_STRINGIFY;
TERM(sublex_start());
case KEY_qx:
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
missingterm(NULL);
readpipe_override();
}
if (key == KEY_format) {
- if (*s == '=')
- PL_lex_formbrack = PL_lex_brackets + 1;
+ PL_lex_formbrack = PL_lex_brackets + 1;
#ifdef PERL_MAD
PL_thistoken = subtoken;
s = d;
const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
STRLEN tmplen;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
/* strip spaces and check for bad characters */
S_pending_ident(pTHX)
{
dVAR;
- register char *d;
PADOFFSET tmp = 0;
/* pit holds the identifier we read and pending_ident is reset */
char pit = PL_pending_ident;
/*
build the ops for accesses to a my() variable.
-
- Deny my($a) or my($b) in a sort block, *if* $a or $b is
- then used in a comparison. This catches most, but not
- all cases. For instance, it catches
- sort { my($a); $a <=> $b }
- but not
- sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
- (although why you'd do that is anyone's guess).
*/
if (!has_colon) {
return WORD;
}
- /* if it's a sort block and they're naming $a or $b */
- if (PL_last_lop_op == OP_SORT &&
- PL_tokenbuf[0] == '$' &&
- (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
- && !PL_tokenbuf[2])
- {
- for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
- d < PL_bufend && *d != '\n';
- d++)
- {
- if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
- Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
- PL_tokenbuf);
- }
- }
- }
-
pl_yylval.opval = newOP(OP_PADANY, 0);
pl_yylval.opval->op_targ = tmp;
return PRIVATEREF;
bracket = s;
s++;
}
- else if (ck_uni)
- check_uni();
if (s < send) {
if (UTF) {
const STRLEN skip = UTF8SKIP(s);
*d = toCTRL(*s);
s++;
}
+ else if (ck_uni && !bracket)
+ check_uni();
if (bracket) {
if (isSPACE(s[-1])) {
while (s < send) {
/* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
* the parse starting at 's', based on the subset that are valid in this
* context input to this routine in 'valid_flags'. Advances s. Returns
- * TRUE if the input was a valid flag, so the next char may be as well;
- * otherwise FALSE. 'charset' should point to a NUL upon first call on the
- * current regex. This routine will set it to any charset modifier found.
- * The caller shouldn't change it. This way, another charset modifier
- * encountered in the parse can be detected as an error, as we have decided
- * allow only one */
+ * TRUE if the input should be treated as a valid flag, so the next char
+ * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
+ * first call on the current regex. This routine will set it to any
+ * charset modifier found. The caller shouldn't change it. This way,
+ * another charset modifier encountered in the parse can be detected as an
+ * error, as we have decided to allow only one */
const char c = **s;
-
- if (! strchr(valid_flags, c)) {
- if (isALNUM(c)) {
- goto deprecate;
+ STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
+
+ if ( charlen != 1 || ! strchr(valid_flags, c) ) {
+ if (isALNUM_lazy_if(*s, UTF)) {
+ yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
+ UTF ? SVf_UTF8 : 0);
+ (*s) += charlen;
+ /* Pretend that it worked, so will continue processing before
+ * dieing */
+ return TRUE;
}
return FALSE;
}
case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
case LOCALE_PAT_MOD:
-
- /* In 5.14, qr//lt is legal but deprecated; the 't' means they
- * can't be regex modifiers.
- * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
- * much as easily done. s///lei, for example, has to mean regex
- * modifiers if it's not an error (as does any word character
- * following the 'e'). Otherwise, we resolve to the backwards-
- * compatible, but less likely 's/// le ...', i.e. as meaning
- * less-than-or-equal. The reason it's not likely is that s//
- * returns a number for code in the field (/r returns a string, but
- * that wasn't added until the 5.13 series), and so '<=' should be
- * used for comparing, not 'le'. */
- if (*((*s) + 1) == 't') {
- goto deprecate;
- }
- else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
-
- /* 'e' is valid only for substitutes, s///e. If it is not
- * valid in the current context, then 'm//le' must mean the
- * comparison operator, so use the regular deprecation message.
- */
- if (! strchr(valid_flags, 'e')) {
- goto deprecate;
- }
- Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.18, it will be resolved the other way");
- return FALSE;
- }
if (*charset) {
goto multiple_charsets;
}
*charset = c;
break;
case UNICODE_PAT_MOD:
- /* In 5.14, qr//unless and qr//until are legal but deprecated; the
- * 'n' means they can't be regex modifiers */
- if (*((*s) + 1) == 'n') {
- goto deprecate;
- }
if (*charset) {
goto multiple_charsets;
}
*charset = c;
break;
case ASCII_RESTRICT_PAT_MOD:
- /* In 5.14, qr//and is legal but deprecated; the 'n' means they
- * can't be regex modifiers */
- if (*((*s) + 1) == 'n') {
- goto deprecate;
- }
-
if (! *charset) {
set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
}
(*s)++;
return TRUE;
- deprecate:
- Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
- "Having no space between pattern and following word is deprecated");
- return FALSE;
-
multiple_charsets:
if (*charset != c) {
yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
{
dVAR;
PMOP *pm;
- char *s = scan_str(start,!!PL_madskills,FALSE);
+ char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
char charset = '\0'; /* character set modifier */
PERL_ARGS_ASSERT_SCAN_PAT;
+ /* this was only needed for the initial scan_str; set it to false
+ * so that any (?{}) code blocks etc are parsed normally */
+ PL_reg_state.re_reparsing = FALSE;
if (!s) {
const char * const delimiter = skipspace(start);
Perl_croak(aTHX_
#ifdef PERL_MAD
modstart = s;
#endif
+
+ /* if qr/...(?{..}).../, then need to parse the pattern within a new
+ * anon CV. False positives like qr/[(?{]/ are harmless */
+
+ if (type == OP_QR) {
+ STRLEN len;
+ char *e, *p = SvPV(PL_lex_stuff, len);
+ e = p + len;
+ for (; p < e; p++) {
+ if (p[0] == '(' && p[1] == '?'
+ && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
+ {
+ pm->op_pmflags |= PMf_HAS_CV;
+ break;
+ }
+ }
+ pm->op_pmflags |= PMf_IS_QR;
+ }
+
while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
#ifdef PERL_MAD
if (PL_madskills && modstart != s) {
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Substitution pattern not terminated");
#endif
first_start = PL_multi_start;
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
pl_yylval.ival = OP_NULL;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Transliteration pattern not terminated");
}
#endif
- s = scan_str(s,!!PL_madskills,FALSE);
+ s = scan_str(s,!!PL_madskills,FALSE,FALSE);
if (!s) {
if (PL_lex_stuff) {
SvREFCNT_dec(PL_lex_stuff);
s = peek;
term = *s++;
s = delimcpy(d, e, s, PL_bufend, term, &len);
+ if (s == PL_bufend)
+ Perl_croak(aTHX_ "Unterminated delimiter for here document");
d += len;
- if (s < PL_bufend)
- s++;
+ s++;
}
else {
if (*s == '\\')
if (d - PL_tokenbuf != len) {
pl_yylval.ival = OP_GLOB;
- s = scan_str(start,!!PL_madskills,FALSE);
+ s = scan_str(start,!!PL_madskills,FALSE,FALSE);
if (!s)
Perl_croak(aTHX_ "Glob not terminated");
return s;
takes: start position in buffer
keep_quoted preserve \ on the embedded delimiter(s)
keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
updates the read buffer.
*/
STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
{
dVAR;
SV *sv; /* scalar value: string */
}
#endif
for (;;) {
- if (PL_encoding && !UTF) {
+ if (PL_encoding && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
CopLINE_inc(PL_curcop);
/* handle quoted delimiters */
if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
- if (!keep_quoted && s[1] == term)
+ if (!keep_quoted
+ && (s[1] == term
+ || (re_reparse && s[1] == '\\'))
+ )
s++;
- /* any other quotes are simply copied straight through */
+ /* any other quotes are simply copied straight through */
else
*to++ = *s++;
}
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF) {
+ if (!PL_encoding || UTF || re_reparse) {
#ifdef PERL_MAD
if (PL_madskills) {
char * const tstart = SvPVX(PL_linestr) + stuffstart;
}
}
#endif
- if (has_utf8 || PL_encoding)
+ if (has_utf8 || (PL_encoding && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
break;
}
}
- if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
- eol = (char *) memchr(s,'\n',PL_bufend-s);
- if (!eol++)
+ eol = (char *) memchr(s,'\n',PL_bufend-s);
+ if (!eol++)
eol = PL_bufend;
- }
- else
- eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
if (*s != '#') {
for (t = s; t < eol; t++) {
if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
break;
}
s = (char*)eol;
- if (PL_rsfp || PL_parser->filtered) {
+ if ((PL_rsfp || PL_parser->filtered)
+ && PL_parser->form_lex_state == LEX_NORMAL) {
bool got_some;
#ifdef PERL_MAD
if (PL_madskills) {
if (SvCUR(stuff)) {
PL_expect = XTERM;
if (needargs) {
- PL_lex_state = LEX_NORMAL;
+ PL_lex_state = PL_parser->form_lex_state;
start_force(PL_curforce);
NEXTVAL_NEXTTOKE.ival = 0;
force_next(',');
const I32 oldsavestack_ix = PL_savestack_ix;
CV* const outsidecv = PL_compcv;
- if (PL_compcv) {
- assert(SvTYPE(PL_compcv) == SVt_PVCV);
- }
SAVEI32(PL_subline);
save_item(PL_subname);
SAVESPTR(PL_compcv);
return stmtseqop;
}
-void
-Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
-{
- PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
- deprecate("qw(...) as parentheses");
- force_next((4<<24)|')');
- if (qwlist->op_type == OP_STUB) {
- op_free(qwlist);
- }
- else {
- start_force(PL_curforce);
- NEXTVAL_NEXTTOKE.opval = qwlist;
- force_next(THING);
- }
- force_next((2<<24)|'(');
-}
-
/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
- * indent-tabs-mode: t
+ * indent-tabs-mode: nil
* End:
*
- * ex: set ts=8 sts=4 sw=4 noet:
+ * ex: set ts=8 sts=4 sw=4 et:
*/