* LOOPX : loop exiting command (goto, last, dump, etc)
* FTST : file test operator
* FUN0 : zero-argument function
+ * FUN0OP : zero-argument function, with its op created in this file
* FUN1 : not used, except for not, which isn't a UNIOP
* BOop : bitwise or or xor
* BAop : bitwise and
#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
+#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
{ FORMAT, TOKENTYPE_NONE, "FORMAT" },
{ FUNC, TOKENTYPE_OPNUM, "FUNC" },
{ FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
+ { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
{ FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
{ FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
{ FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
and I<rsfp> supplies the remainder of the source.
The I<flags> parameter is reserved for future use, and must always
-be zero.
+be zero, except for one flag that is currently reserved for perl's internal
+use.
=cut
*/
+/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
+ can share filters with the current parser. */
+
void
Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
{
const char *s = NULL;
STRLEN len;
yy_parser *parser, *oparser;
- if (flags)
+ if (flags && flags != LEX_START_SAME_FILTER)
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
/* create and initialise a parser */
parser->lex_state = LEX_NORMAL;
parser->expect = XSTATE;
parser->rsfp = rsfp;
- parser->rsfp_filters = newAV();
+ parser->rsfp_filters =
+ !(flags & LEX_START_SAME_FILTER) || !oparser
+ ? newAV()
+ : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
Newx(parser->lex_brackstack, 120, char);
Newx(parser->lex_casestack, 12, char);
if (c != -1) {
if (c == '\n')
CopLINE_inc(PL_curcop);
- PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
+ if (UTF)
+ PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
+ else
+ ++(PL_parser->bufptr);
}
return c;
}
const char *t;
const char *n;
const char *e;
+ line_t line_num;
PERL_ARGS_ASSERT_INCLINE;
if (*e != '\n' && *e != '\0')
return; /* false alarm */
+ line_num = atoi(n)-1;
+
if (t - s > 0) {
const STRLEN len = t - s;
-#ifndef USE_ITHREADS
SV *const temp_sv = CopFILESV(PL_curcop);
const char *cf;
STRLEN tmplen;
gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
/* adjust ${"::_<newfilename"} to store the new file name */
GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
- GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
- GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+ /* The line number may differ. If that is the case,
+ alias the saved lines that are in the array.
+ Otherwise alias the whole array. */
+ if (CopLINE(PL_curcop) == line_num) {
+ GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
+ GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+ }
+ else if (GvAV(*gvp)) {
+ AV * const av = GvAV(*gvp);
+ const I32 start = CopLINE(PL_curcop)+1;
+ I32 items = AvFILLp(av) - start;
+ if (items > 0) {
+ AV * const av2 = GvAVn(gv2);
+ SV **svp = AvARRAY(av) + start;
+ I32 l = (I32)line_num+1;
+ while (items--)
+ av_store(av2, l++, SvREFCNT_inc(*svp++));
+ }
+ }
}
if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
}
if (tmpbuf != smallbuf) Safefree(tmpbuf);
}
-#endif
CopFILE_free(PL_curcop);
CopFILE_setn(PL_curcop, s, len);
}
- CopLINE_set(PL_curcop, atoi(n)-1);
+ CopLINE_set(PL_curcop, line_num);
}
#ifdef PERL_MAD
if (*s) {
const STRLEN len = strlen(s);
- OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
+ OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
+ UTF ? SVf_UTF8 : 0));
start_force(PL_curforce);
NEXTVAL_NEXTTOKE.opval = o;
force_next(WORD);
warnings if the symbol must be introduced in an eval.
GSAR 96-10-12 */
gv_fetchpvn_flags(s, len,
- PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
- : GV_ADD,
+ (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
+ : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
kind == '$' ? SVt_PV :
kind == '@' ? SVt_PVAV :
kind == '%' ? SVt_PVHV :
if (PL_lex_inpat) {
- /* Pass through to the regex compiler unchanged. The
- * reason we evaluated the number above is to make sure
- * there wasn't a syntax error. */
+ /* On non-EBCDIC platforms, pass through to the regex
+ * compiler unchanged. The reason we evaluated the
+ * number above is to make sure there wasn't a syntax
+ * error. But on EBCDIC we convert to native so
+ * downstream code can continue to assume it's native
+ */
s -= 5; /* Include the '\N{U+' */
+#ifdef EBCDIC
+ d += my_snprintf(d, e - s + 1 + 1, /* includes the }
+ and the \0 */
+ "\\N{U+%X}",
+ (unsigned int) UNI_TO_NATIVE(uv));
+#else
Copy(s, d, e - s + 1, char); /* 1 = include the } */
d += e - s + 1;
+#endif
}
else { /* Not a pattern: convert the hex to string */
}
/* Convert first code point to hex, including the
- * boiler plate before it */
+ * boiler plate before it. For all these, we
+ * convert to native format so that downstream code
+ * can continue to assume the input is native */
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X", (unsigned int) uv);
+ "\\N{U+%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- ".%X", (unsigned int) uv);
+ ".%X",
+ (unsigned int) UNI_TO_NATIVE(uv));
d = off + SvGROW(sv, off
+ output_length
int len;
scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
len = (int)strlen(tmpbuf);
- if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
+ if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
+ UTF ? SVf_UTF8 : 0, SVt_PV))
weight -= 100;
else
weight -= 10;
return 0;
if (cv) {
if (SvPOK(cv)) {
- const char *proto = SvPVX_const(cv);
+ const char *proto = CvPROTO(cv);
if (proto) {
if (*proto == ';')
proto++;
#endif
goto bare_package;
}
- indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
+ indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
if (indirgv && GvCVu(indirgv))
return 0;
/* filehandle or package name makes it a method */
- if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
+ if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
#ifdef PERL_MAD
soff = s - SvPVX(PL_linestr);
#endif
S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
if (PL_madskills)
- curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
+ curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
+ ( UTF ? SVf_UTF8 : 0 )));
PL_expect = XTERM;
force_next(WORD);
PL_bufptr = s;
if (len > 2 &&
(pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
- (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
+ (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
{
return GvHV(gv); /* Foo:: */
}
/* use constant CLASS => 'MyClass' */
- gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
+ gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
if (gv && GvCV(gv)) {
SV * const sv = cv_const_sv(GvCV(gv));
if (sv)
pkgname = SvPV_const(sv, len);
}
- return gv_stashpvn(pkgname, len, 0);
+ return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
}
/*
*(U8*)s == 0xEF ||
*(U8*)s >= 0xFE ||
s[1] == 0)) {
- bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
+ Off_t offset = (IV)PerlIO_tell(PL_rsfp);
+ bof = (offset == (Off_t)SvCUR(PL_linestr));
+#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
+ /* offset may include swallowed CR */
+ if (!bof)
+ bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
+#endif
if (bof) {
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = swallow_bom((U8*)s);
break;
}
}
- sv = newSVpvn(s, len);
+ sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
d = scan_str(d,TRUE,TRUE);
if (!d) {
PREREF('$');
}
- /* This kludge not intended to be bulletproof. */
- if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
- pl_yylval.opval = newSVOP(OP_CONST, 0,
- newSViv(CopARYBASE_get(&PL_compiling)));
- pl_yylval.opval->op_private = OPpCONST_ARYBASE;
- TERM(THING);
- }
-
d = s;
{
const char tmp = *s;
else if (!isALPHA(*start) && (PL_expect == XTERM
|| PL_expect == XREF || PL_expect == XSTATE
|| PL_expect == XTERMORDORDOR)) {
- GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
+ GV *const gv = gv_fetchpvn_flags(s, start - s,
+ UTF ? SVf_UTF8 : 0, SVt_PVCV);
if (!gv) {
s = scan_num(s, &pl_yylval);
TERM(THING);
GV *hgv = NULL; /* hidden (loser) */
if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
CV *cv;
- if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
+ if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
+ UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
(cv = GvCVu(gv)))
{
if (GvIMPORTED_CV(gv))
hgv = gv;
}
if (!ogv &&
- (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
+ (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
+ UTF ? -(I32)len : (I32)len, FALSE)) &&
(gv = *gvp) && isGV_with_GP(gv) &&
GvCVu(gv) && GvIMPORTED_CV(gv))
{
PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
{
if (ckWARN(WARN_BAREWORD)
- && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
+ && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
"Bareword \"%s\" refers to nonexistent package",
PL_tokenbuf);
constants that might already be there into full
blown PVGVs with attached PVCV. */
gv = gv_fetchpvn_flags(PL_tokenbuf, len,
- GV_NOADD_NOINIT, SVt_PVCV);
+ GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
+ SVt_PVCV);
}
len = 0;
}
goto safe_bareword;
{
- OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
+ OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
const_op->op_private = OPpCONST_BARE;
rv2cv_op = newCVREF(0, const_op);
}
#endif
SvPOK(cv))
{
- STRLEN protolen;
- const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
+ STRLEN protolen = CvPROTOLEN(cv);
+ const char *proto = CvPROTO(cv);
if (!protolen)
TERM(FUNC0SUB);
while (*proto == ';')
}
}
if (probable_sub) {
- gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
+ gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
+ SVt_PVCV);
op_free(pl_yylval.opval);
pl_yylval.opval = rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
d = PL_tokenbuf;
while (isLOWER(*d))
d++;
- if (!*d && !gv_stashpv(PL_tokenbuf, 0))
+ if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
PL_tokenbuf);
}
}
case KEY___FILE__:
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
- newSVpv(CopFILE(PL_curcop),0));
- TERM(THING);
+ FUN0OP(
+ (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
+ );
case KEY___LINE__:
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
- Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
- TERM(THING);
+ FUN0OP(
+ (OP*)newSVOP(OP_CONST, 0,
+ Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
+ );
case KEY___PACKAGE__:
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+ FUN0OP(
+ (OP*)newSVOP(OP_CONST, 0,
(PL_curstash
? newSVhek(HvNAME_HEK(PL_curstash))
- : &PL_sv_undef));
- TERM(THING);
+ : &PL_sv_undef))
+ );
case KEY___DATA__:
case KEY___END__: {
GV *gv;
if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
const char *pname = "main";
+ STRLEN plen = 4;
+ U32 putf8 = 0;
if (PL_tokenbuf[2] == 'D')
- pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
- gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
- SVt_PVIO);
+ {
+ HV * const stash =
+ PL_curstash ? PL_curstash : PL_defstash;
+ pname = HvNAME_get(stash);
+ plen = HvNAMELEN (stash);
+ if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
+ }
+ gv = gv_fetchpvn_flags(
+ Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
+ plen+6, GV_ADD|putf8, SVt_PVIO
+ );
GvMULTI_on(gv);
if (!GvIO(gv))
GvIOp(gv) = newIO();
#else
if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
#endif /* NETWARE */
-#ifdef PERLIO_IS_STDIO /* really? */
-# if defined(__BORLANDC__)
- /* XXX see note in do_binmode() */
- ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
-# endif
-#endif
if (loc > 0)
PerlIO_seek(PL_rsfp, loc, 0);
}
s += 2;
d = s;
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
- if (!(tmp = keyword(PL_tokenbuf, len, 0)))
+ if (!(tmp = keyword(PL_tokenbuf, len, 1)))
Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
if (tmp < 0)
tmp = -tmp;
- else if (tmp == KEY_require || tmp == KEY_do)
+ else if (tmp == KEY_require || tmp == KEY_do
+ || tmp == KEY_glob)
/* that's a way to remember we saw "CORE::" */
orig_keyword = tmp;
goto reserved_word;
UNI(OP_CHOP);
case KEY_continue:
- /* When 'use switch' is in effect, continue has a dual
- life as a control operator. */
- {
- if (!FEATURE_IS_ENABLED("switch"))
- PREBLOCK(CONTINUE);
- else {
/* We have to disambiguate the two senses of
"continue". If the next token is a '{' then
treat it as the start of a continue block;
PREBLOCK(CONTINUE);
else
FUN0(OP_CONTINUE);
- }
- }
case KEY_chdir:
/* may use HOME */
OPERATOR(GIVEN);
case KEY_glob:
- LOP(OP_GLOB,XTERM);
+ LOP(
+ orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
+ XTERM
+ );
case KEY_hex:
UNI(OP_HEX);
missingterm(NULL);
PL_expect = XOPERATOR;
if (SvCUR(PL_lex_stuff)) {
- int warned = 0;
+ int warned_comma = !ckWARN(WARN_QW);
+ int warned_comment = warned_comma;
d = SvPV_force(PL_lex_stuff, len);
while (len) {
for (; isSPACE(*d) && len; --len, ++d)
if (len) {
SV *sv;
const char *b = d;
- if (!warned && ckWARN(WARN_QW)) {
+ if (!warned_comma || !warned_comment) {
for (; !isSPACE(*d) && len; --len, ++d) {
- if (*d == ',') {
+ if (!warned_comma && *d == ',') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to separate words with commas");
- ++warned;
+ ++warned_comma;
}
- else if (*d == '#') {
+ else if (!warned_comment && *d == '#') {
Perl_warner(aTHX_ packWARN(WARN_QW),
"Possible attempt to put comments in qw() list");
- ++warned;
+ ++warned_comment;
}
}
}
*PL_tokenbuf = '\0';
s = force_word(s,WORD,TRUE,TRUE,FALSE);
if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
- gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
+ gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
+ GV_ADD | (UTF ? SVf_UTF8 : 0));
else if (*s == '<')
yyerror("<> should be quotes");
}
SV *tmpwhite = 0;
char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
- SV *subtoken = newSVpvn(tstart, s - tstart);
+ SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
PL_thistoken = 0;
d = s;
d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
#ifdef PERL_MAD
if (PL_madskills)
- nametoke = newSVpvn(s, d - s);
+ nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
#endif
if (memchr(tmpbuf, ':', len))
sv_setpvn(PL_subname, tmpbuf, len);
sv_catpvs(PL_subname,"::");
sv_catpvn(PL_subname,tmpbuf,len);
}
+ if (SvUTF8(PL_linestr))
+ SvUTF8_on(PL_subname);
have_name = TRUE;
#ifdef PERL_MAD
bool underscore = FALSE;
bool seen_underscore = FALSE;
const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
+ STRLEN tmplen;
s = scan_str(s,!!PL_madskills,FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
/* strip spaces and check for bad characters */
- d = SvPVX(PL_lex_stuff);
+ d = SvPV(PL_lex_stuff, tmplen);
tmp = 0;
- for (p = d; *p; ++p) {
+ for (p = d; tmplen; tmplen--, ++p) {
if (!isSPACE(*p)) {
- d[tmp++] = *p;
+ d[tmp++] = *p;
if (warnillegalproto) {
if (must_be_last)
proto_after_greedy_proto = TRUE;
- if (!strchr("$@%*;[]&\\_+", *p)) {
+ if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
bad_proto = TRUE;
}
else {
}
}
}
- d[tmp] = '\0';
+ d[tmp] = '\0';
if (proto_after_greedy_proto)
Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
"Prototype after '%c' for %"SVf" : %s",
greedy_proto, SVfARG(PL_subname), d);
- if (bad_proto)
+ if (bad_proto) {
+ SV *dsv = newSVpvs_flags("", SVs_TEMP);
Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
"Illegal character %sin prototype for %"SVf" : %s",
seen_underscore ? "after '_' " : "",
- SVfARG(PL_subname), d);
- SvCUR_set(PL_lex_stuff, tmp);
+ SVfARG(PL_subname),
+ sv_uni_display(dsv,
+ newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
+ tmp, UNI_DISPLAY_ISPRINT));
+ }
+ SvCUR_set(PL_lex_stuff, tmp);
have_proto = TRUE;
#ifdef PERL_MAD
yyerror(Perl_form(aTHX_ "No package name allowed for "
"variable %s in \"our\"",
PL_tokenbuf));
- tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
}
else {
if (has_colon)
PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
pl_yylval.opval = newOP(OP_PADANY, 0);
- pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+ pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
return PRIVATEREF;
}
}
if (!has_colon) {
if (!PL_in_my)
- tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
+ tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
+ UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HEK * const stashname = HvNAME_HEK(stash);
SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
- sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
+ sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
pl_yylval.opval->op_private = OPpCONST_ENTERED;
gv_fetchsv(sym,
*/
if (ckWARN(WARN_AMBIGUOUS) &&
pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
- GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
- SVt_PVAV);
+ GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
+ ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
/* DO NOT warn for @- and @+ */
&& !( PL_tokenbuf[2] == '\0' &&
}
/* build ops for a bareword */
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
- tokenbuf_len - 1));
+ pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
+ tokenbuf_len - 1,
+ UTF ? SVf_UTF8 : 0 ));
pl_yylval.opval->op_private = OPpCONST_ENTERED;
gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
- PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
+ (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
+ | ( UTF ? SVf_UTF8 : 0 ),
((PL_tokenbuf[0] == '$') ? SVt_PV
: (PL_tokenbuf[0] == '@') ? SVt_PVAV
: SVt_PVHV));
if (keyword(w, s - w, 0))
return;
- gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
+ gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
if (gv && GvCVu(gv))
return;
Perl_croak(aTHX_ "No comma allowed after %s", what);
}
else if (ck_uni)
check_uni();
- if (s < send)
- *d = *s++;
- d[1] = '\0';
+ if (s < send) {
+ if (UTF) {
+ const STRLEN skip = UTF8SKIP(s);
+ STRLEN i;
+ d[skip] = '\0';
+ for ( i = 0; i < skip; i++ )
+ d[i] = *s++;
+ }
+ else {
+ *d = *s++;
+ d[1] = '\0';
+ }
+ }
if (*d == '^' && *s && isCONTROLVAR(*s)) {
*d = toCTRL(*s);
s++;
}
}
if (isIDFIRST_lazy_if(d,UTF)) {
- d++;
+ d += UTF8SKIP(d);
if (UTF) {
char *end = s;
while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
}
static bool
-S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
+S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
/* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
* the parse starting at 's', based on the subset that are valid in this
* context input to this routine in 'valid_flags'. Advances s. Returns
* TRUE if the input was a valid flag, so the next char may be as well;
- * otherwise FALSE */
+ * otherwise FALSE. 'charset' should point to a NUL upon first call on the
+ * current regex. This routine will set it to any charset modifier found.
+ * The caller shouldn't change it. This way, another charset modifier
+ * encountered in the parse can be detected as an error, as we have decided
+ * allow only one */
const char c = **s;
"Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
return FALSE;
}
+ if (*charset) {
+ goto multiple_charsets;
+ }
set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
+ *charset = c;
break;
case UNICODE_PAT_MOD:
/* In 5.14, qr//unless and qr//until are legal but deprecated; the
if (*((*s) + 1) == 'n') {
goto deprecate;
}
+ if (*charset) {
+ goto multiple_charsets;
+ }
set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
+ *charset = c;
break;
case ASCII_RESTRICT_PAT_MOD:
/* In 5.14, qr//and is legal but deprecated; the 'n' means they
if (*((*s) + 1) == 'n') {
goto deprecate;
}
- if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
- /* Doubled modifier implies more restricted */
- set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
- (*s)++;
+
+ if (! *charset) {
+ set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
}
else {
- set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
+
+ /* Error if previous modifier wasn't an 'a', but if it was, see
+ * if, and accept, a second occurrence (only) */
+ if (*charset != 'a'
+ || get_regex_charset(*pmfl)
+ != REGEX_ASCII_RESTRICTED_CHARSET)
+ {
+ goto multiple_charsets;
+ }
+ set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
}
+ *charset = c;
break;
case DEPENDS_PAT_MOD:
+ if (*charset) {
+ goto multiple_charsets;
+ }
set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
+ *charset = c;
break;
}
Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
"Having no space between pattern and following word is deprecated");
return FALSE;
+
+ multiple_charsets:
+ if (*charset != c) {
+ yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
+ }
+ else if (c == 'a') {
+ yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
+ }
+ else {
+ yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
+ }
+
+ /* Pretend that it worked, so will continue processing before dieing */
+ (*s)++;
+ return TRUE;
}
STATIC char *
char *s = scan_str(start,!!PL_madskills,FALSE);
const char * const valid_flags =
(const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
+ char charset = '\0'; /* character set modifier */
#ifdef PERL_MAD
char *modstart;
#endif
#ifdef PERL_MAD
modstart = s;
#endif
- while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s)) {};
+ while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
#ifdef PERL_MAD
if (PL_madskills && modstart != s) {
SV* tmptoken = newSVpvn(modstart, s - modstart);
register PMOP *pm;
I32 first_start;
I32 es = 0;
+ char charset = '\0'; /* character set modifier */
#ifdef PERL_MAD
char *modstart;
#endif
s++;
es++;
}
- else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s)) {
+ else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
+ {
break;
}
}
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
*(SvPVX(PL_linestr) + off ) = ' ';
+ lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
sv_catsv(PL_linestr,herewas);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
/* allow <Pkg'VALUE> or <Pkg::VALUE> */
while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
- d++;
+ d += UTF ? UTF8SKIP(d) : 1;
/* If we've tried to read what we allow filehandles to look like, and
there's still text left, then it must be a glob() and not a getline.
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy(d, len, 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
gv = gv_fetchpv(d,
(PL_in_eval
? (GV_ADDMULTI | GV_ADDINEVAL)
- : GV_ADDMULTI),
+ : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
SVt_PV);
PL_lex_op = readline_overriden
? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
/* If it's none of the above, it must be a literal filehandle
(<Foo::BAR> or <FOO>) so build a simple readline OP */
else {
- GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
+ GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
PL_lex_op = readline_overriden
? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
op_append_elem(OP_LIST,