#define PL_lex_brackstack (PL_parser->lex_brackstack)
#define PL_lex_casemods (PL_parser->lex_casemods)
#define PL_lex_casestack (PL_parser->lex_casestack)
-#define PL_lex_defer (PL_parser->lex_defer)
#define PL_lex_dojoin (PL_parser->lex_dojoin)
#define PL_lex_formbrack (PL_parser->lex_formbrack)
#define PL_lex_inpat (PL_parser->lex_inpat)
#define PL_multi_open (PL_parser->multi_open)
#define PL_multi_close (PL_parser->multi_close)
#define PL_preambled (PL_parser->preambled)
-#define PL_sublex_info (PL_parser->sublex_info)
#define PL_linestr (PL_parser->linestr)
#define PL_expect (PL_parser->expect)
#define PL_copline (PL_parser->copline)
# define PL_nexttype (PL_parser->nexttype)
# define PL_nextval (PL_parser->nextval)
+
+#define SvEVALED(sv) \
+ (SvTYPE(sv) >= SVt_PVNV \
+ && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
+
static const char* const ident_too_long = "Identifier too long";
# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
string or after \E, $foo, etc */
#define LEX_INTERPCONST 2 /* NOT USED */
#define LEX_FORMLINE 1 /* expecting a format line */
-#define LEX_KNOWNEXT 0 /* next token known; just return it */
#ifdef DEBUGGING
#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
#define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
-#define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
+#define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
pl_yylval.ival=f, \
PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
REPORT((int)LOOPEX))
if (have_x) PL_expect = x; \
PL_bufptr = s; \
PL_last_uni = PL_oldbufptr; \
- PL_last_lop_op = f; \
+ PL_last_lop_op = (f) < 0 ? -(f) : (f); \
if (*s == '(') \
return REPORT( (int)FUNC1 ); \
s = skipspace(s); \
{ USE, TOKENTYPE_IVAL, "USE" },
{ WHEN, TOKENTYPE_IVAL, "WHEN" },
{ WHILE, TOKENTYPE_IVAL, "WHILE" },
- { WORD, TOKENTYPE_OPVAL, "WORD" },
+ { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
{ YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
{ 0, TOKENTYPE_NONE, NULL }
};
STATIC void
S_missingterm(pTHX_ char *s)
{
- char tmpbuf[3];
+ char tmpbuf[UTF8_MAXBYTES + 1];
char q;
+ bool uni = FALSE;
+ SV *sv;
if (s) {
char * const nl = strrchr(s,'\n');
if (nl)
*nl = '\0';
+ uni = UTF;
}
- else if ((U8) PL_multi_close < 32) {
+ else if (PL_multi_close < 32) {
*tmpbuf = '^';
tmpbuf[1] = (char)toCTRL(PL_multi_close);
tmpbuf[2] = '\0';
s = tmpbuf;
}
else {
- *tmpbuf = (char)PL_multi_close;
- tmpbuf[1] = '\0';
+ if (LIKELY(PL_multi_close < 256)) {
+ *tmpbuf = (char)PL_multi_close;
+ tmpbuf[1] = '\0';
+ }
+ else {
+ uni = TRUE;
+ *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
+ }
s = tmpbuf;
}
q = strchr(s,'"') ? '\'' : '"';
- Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
+ sv = sv_2mortal(newSVpv(s,0));
+ if (uni)
+ SvUTF8_on(sv);
+ Perl_croak(aTHX_ "Can't find string terminator %c%"SVf
+ "%c anywhere before EOF",q,SVfARG(sv),q);
}
#include "feature.h"
parser->linestr = flags & LEX_START_COPIED
? SvREFCNT_inc_simple_NN(line)
: newSVpvn_flags(s, len, SvUTF8(line));
- sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
+ if (!rsfp)
+ sv_catpvs(parser->linestr, "\n;");
} else {
parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
}
PerlIO_close(parser->rsfp);
SvREFCNT_dec(parser->rsfp_filters);
SvREFCNT_dec(parser->lex_stuff);
- SvREFCNT_dec(parser->sublex_info.repl);
+ SvREFCNT_dec(parser->lex_sub_repl);
Safefree(parser->lex_brackstack);
Safefree(parser->lex_casestack);
}
else {
assert(p < e -1 );
- *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
p += 2;
}
}
got_some = 0;
} else {
if (!SvPOK(linestr)) /* can get undefined by filter_gets */
- sv_setpvs(linestr, "");
+ SvPVCLEAR(linestr);
eof:
/* End of real input. Close filehandle (unless it was STDIN),
* then add implicit termination.
Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
{
STRLEN len, origlen;
- char *p = proto ? SvPV(proto, len) : NULL;
+ char *p;
bool bad_proto = FALSE;
bool in_brackets = FALSE;
bool after_slash = FALSE;
if (!proto)
return TRUE;
+ p = SvPV(proto, len);
origlen = len;
for (; len--; p++) {
if (!isSPACE(*p)) {
return;
while (SPACE_OR_TAB(*s))
s++;
- if (strnEQ(s, "line", 4))
+ if (strEQs(s, "line"))
s += 4;
else
return;
}
else {
t = s;
- while (!isSPACE(*t))
+ while (*t && !isSPACE(*t))
t++;
e = t;
}
CopLINE_set(PL_curcop, line_num);
}
-#define skipspace(s) skipspace_flags(s, 0)
-
-
STATIC void
S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
{
sv = *av_fetch(av, 0, 1);
SvUPGRADE(sv, SVt_PVMG);
}
- if (!SvPOK(sv)) sv_setpvs(sv,"");
+ if (!SvPOK(sv)) SvPVCLEAR(sv);
if (orig_sv)
sv_catsv(sv, orig_sv);
else
}
/*
- * S_skipspace
+ * skipspace
* Called to gobble the appropriate amount and type of whitespace.
* Skips comments as well.
+ * Returns the next character after the whitespace that is skipped.
+ *
+ * peekspace
+ * Same thing, but look ahead without incrementing line numbers or
+ * adjusting PL_linestart.
*/
+#define skipspace(s) skipspace_flags(s, 0)
+#define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
+
STATIC char *
S_skipspace_flags(pTHX_ char *s, U32 flags)
{
*/
STATIC I32
-S_lop(pTHX_ I32 f, int x, char *s)
+S_lop(pTHX_ I32 f, U8 x, char *s)
{
PERL_ARGS_ASSERT_LOP;
assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
PL_nexttype[PL_nexttoke] = type;
PL_nexttoke++;
- if (PL_lex_state != LEX_KNOWNEXT) {
- PL_lex_defer = PL_lex_state;
- PL_lex_state = LEX_KNOWNEXT;
- }
}
/*
S_postderef(pTHX_ int const funny, char const next)
{
assert(funny == DOLSHARP || strchr("$@%&*", funny));
- assert(strchr("*[{", next));
if (next == '*') {
PL_expect = XOPERATOR;
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
assert('@' == funny || '$' == funny || DOLSHARP == funny);
PL_lex_state = LEX_INTERPEND;
- force_next(POSTJOIN);
+ if ('@' == funny)
+ force_next(POSTJOIN);
}
force_next(next);
PL_bufptr+=2;
S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
{
SV * const sv = newSVpvn_utf8(start, len,
- !IN_BYTES
- && UTF
- && !is_invariant_string((const U8*)start, len)
- && is_utf8_string((const U8*)start, len));
+ !IN_BYTES
+ && UTF
+ && !is_utf8_invariant_string((const U8*)start, len)
+ && is_utf8_string((const U8*)start, len));
return sv;
}
*
* Arguments:
* char *start : buffer position (must be within PL_linestr)
- * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
+ * int token : PL_next* will be this type of bare word
+ * (e.g., METHOD,BAREWORD)
* int check_keyword : if true, Perl checks to make sure the word isn't
* a keyword (do this if the word is a label, e.g. goto FOO)
* int allow_pack : if true, : characters will also be allowed (require,
start = skipspace(start);
s = start;
if (isIDFIRST_lazy_if(s,UTF)
- || (allow_pack && *s == ':') )
+ || (allow_pack && *s == ':' && s[1] == ':') )
{
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (check_keyword) {
char *s2 = PL_tokenbuf;
STRLEN len2 = len;
- if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
+ if (allow_pack && len > 6 && strEQs(s2, "CORE::"))
s2 += 6, len2 -= 6;
if (keyword(s2, len2, 0))
return start;
}
}
NEXTVAL_NEXTTOKE.opval
- = (OP*)newSVOP(OP_CONST,0,
+ = newSVOP(OP_CONST,0,
S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
force_next(token);
* Called when the lexer wants $foo *foo &foo etc, but the program
* text only contains the "foo" portion. The first argument is a pointer
* to the "foo", and the second argument is the type symbol to prefix.
- * Forces the next token to be a "WORD".
+ * Forces the next token to be a "BAREWORD".
* Creates the symbol if it didn't already exist (via gv_fetchpv()).
*/
if (s[0]) {
const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
- OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
+ OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
UTF ? SVf_UTF8 : 0));
NEXTVAL_NEXTTOKE.opval = o;
- force_next(WORD);
+ force_next(BAREWORD);
if (kind) {
o->op_private = OPpCONST_ENTERED;
/* XXX see note in pp_entereval() for why we forgo typo
/* NOTE: The parser sees the package name and the VERSION swapped */
NEXTVAL_NEXTTOKE.opval = version;
- force_next(WORD);
+ force_next(BAREWORD);
return s;
}
/* NOTE: The parser sees the package name and the VERSION swapped */
NEXTVAL_NEXTTOKE.opval = version;
- force_next(WORD);
+ force_next(BAREWORD);
return s;
}
SvREFCNT_dec(sv);
sv = nsv;
}
- pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
+ pl_yylval.opval = newSVOP(op_type, 0, sv);
return THING;
}
- PL_sublex_info.super_state = PL_lex_state;
- PL_sublex_info.sub_inwhat = (U16)op_type;
- PL_sublex_info.sub_op = PL_lex_op;
+ PL_parser->lex_super_state = PL_lex_state;
+ PL_parser->lex_sub_inwhat = (U16)op_type;
+ PL_parser->lex_sub_op = PL_lex_op;
PL_lex_state = LEX_INTERPPUSH;
PL_expect = XTERM;
const bool is_heredoc = PL_multi_close == '<';
ENTER;
- PL_lex_state = PL_sublex_info.super_state;
+ PL_lex_state = PL_parser->lex_super_state;
SAVEI8(PL_lex_dojoin);
SAVEI32(PL_lex_brackets);
SAVEI32(PL_lex_allbrackets);
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
- SAVEI8(PL_lex_defer);
SAVESPTR(PL_lex_repl);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
SAVEI32(PL_parser->herelines);
PL_parser->herelines = 0;
}
- SAVEI8(PL_multi_close);
+ SAVEIV(PL_multi_close);
SAVEPPTR(PL_bufptr);
SAVEPPTR(PL_bufend);
SAVEPPTR(PL_oldbufptr);
PL_parser->lex_shared->ls_bufptr = PL_bufptr;
PL_linestr = PL_lex_stuff;
- PL_lex_repl = PL_sublex_info.repl;
+ PL_lex_repl = PL_parser->lex_sub_repl;
PL_lex_stuff = NULL;
- PL_sublex_info.repl = NULL;
+ PL_parser->lex_sub_repl = NULL;
/* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
set for an inner quote-like operator and then an error causes scope-
popping. We must not have a PL_lex_stuff value left dangling, as
that breaks assumptions elsewhere. See bug #123617. */
SAVEGENERICSV(PL_lex_stuff);
- SAVEGENERICSV(PL_sublex_info.repl);
+ SAVEGENERICSV(PL_parser->lex_sub_repl);
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
shared->ls_prev = PL_parser->lex_shared;
PL_parser->lex_shared = shared;
- PL_lex_inwhat = PL_sublex_info.sub_inwhat;
+ PL_lex_inwhat = PL_parser->lex_sub_inwhat;
if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
- PL_lex_inpat = PL_sublex_info.sub_op;
+ PL_lex_inpat = PL_parser->lex_sub_op;
else
PL_lex_inpat = NULL;
if (SvUTF8(PL_linestr))
SvUTF8_on(sv);
PL_expect = XOPERATOR;
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
return THING;
}
}
if (SvTYPE(PL_linestr) >= SVt_PVNV) {
CopLINE(PL_curcop) +=
- ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
+ ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
+ PL_parser->herelines;
PL_parser->herelines = 0;
}
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
- if (!SvCUR(res))
+ if (!SvCUR(res)) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+ "Unknown charname '' is deprecated");
return res;
+ }
if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
e - backslash_ptr,
if (*s == ' ' && *(s-1) == ' ') {
goto multi_spaces;
}
- if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s++;
}
}
}
s++;
} else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
+ if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
goto bad_charname;
}
s += 2;
s++;
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
+ if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
{
goto bad_charname;
}
- if (*s == *NBSP_UTF8
- && *(s+1) == *(NBSP_UTF8+1)
- && ckWARN_d(WARN_DEPRECATED))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s += 2;
}
else {
example when it is entirely composed
of hex constants */
SV *res; /* result from charnames */
+ STRLEN offset_to_max; /* The offset in the output to where the range
+ high-end character is temporarily placed */
/* Note on sizing: The scanned constant is placed into sv, which is
* initialized by newSV() assuming one byte of output for every byte of
UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
before set */
#ifdef EBCDIC
- UV literal_endpoint = 0;
- bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
+ int backslash_N = 0; /* ? was the character from \N{} */
+ int non_portable_endpoint = 0; /* ? In a range is an endpoint
+ platform-specific like \x65 */
#endif
PERL_ARGS_ASSERT_SCAN_CONST;
assert(PL_lex_inwhat != OP_TRANSR);
- if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
+ if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
/* If we are doing a trans and we know we want UTF8 set expectation */
- has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
- this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+ has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
+ this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
/* Protect sv from errors and fatal warnings. */
ENTER_with_name("scan_const");
SAVEFREESV(sv);
- while (s < send || dorange) {
+ while (s < send
+ || dorange /* Handle tr/// range at right edge of input */
+ ) {
/* get transliterations out of the way (they're most literal) */
if (PL_lex_inwhat == OP_TRANS) {
- /* expand a range A-Z to the full set of characters. AIE! */
- if (dorange) {
- I32 i; /* current expanded character */
- I32 min; /* first character in range */
- I32 max; /* last character in range */
+ /* But there isn't any special handling necessary unless there is a
+ * range, so for most cases we just drop down and handle the value
+ * as any other. There are two exceptions.
+ *
+ * 1. A minus sign indicates that we are actually going to have
+ * a range. In this case, skip the '-', set a flag, then drop
+ * down to handle what should be the end range value.
+ * 2. After we've handled that value, the next time through, that
+ * flag is set and we fix up the range.
+ *
+ * Ranges entirely within Latin1 are expanded out entirely, in
+ * order to avoid the significant overhead of making a swash.
+ * Ranges that extend above Latin1 have to have a swash, so there
+ * is no advantage to abbreviating them here, so they are stored
+ * here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies
+ * a hyphen without any possible ambiguity. On EBCDIC machines, if
+ * the range is expressed as Unicode, the Latin1 portion is
+ * expanded out even if the entire range extends above Latin1.
+ * This is because each code point in it has to be processed here
+ * individually to get its native translation */
+
+ if (! dorange) {
+
+ /* Here, we don't think we're in a range. If we've processed
+ * at least one character, then see if this next one is a '-',
+ * indicating the previous one was the start of a range. But
+ * don't bother if we're too close to the end for the minus to
+ * mean that. */
+ if (*s != '-' || s >= send - 1 || s == start) {
+
+ /* A regular character. Process like any other, but first
+ * clear any flags */
+ didrange = FALSE;
+ dorange = FALSE;
#ifdef EBCDIC
- UV uvmax = 0;
+ non_portable_endpoint = 0;
+ backslash_N = 0;
#endif
+ /* Drops down to generic code to process current byte */
+ }
+ else {
+ if (didrange) { /* Something like y/A-C-Z// */
+ Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+ }
- if (has_utf8
+ dorange = TRUE;
+
+ s++; /* Skip past the minus */
+
+ /* d now points to where the end-range character will be
+ * placed. Save it so won't have to go finding it later,
+ * and drop down to get that character. (Actually we
+ * instead save the offset, to handle the case where a
+ * realloc in the meantime could change the actual
+ * pointer). We'll finish processing the range the next
+ * time through the loop */
+ offset_to_max = d - SvPVX_const(sv);
+ }
+ } /* End of not a range */
+ else {
+ /* Here we have parsed a range. Now must handle it. At this
+ * point:
+ * 'sv' is a SV* that contains the output string we are
+ * constructing. The final two characters in that string
+ * are the range start and range end, in order.
+ * 'd' points to just beyond the range end in the 'sv' string,
+ * where we would next place something
+ * 'offset_to_max' is the offset in 'sv' at which the character
+ * before 'd' begins.
+ */
+ const char * max_ptr = SvPVX_const(sv) + offset_to_max;
+ const char * min_ptr;
+ IV range_min;
+ IV range_max; /* last character in range */
+ STRLEN save_offset;
+ STRLEN grow;
#ifdef EBCDIC
- && !native_range
+ bool convert_unicode;
+ IV real_range_max = 0;
#endif
- ) {
- char * const c = (char*)utf8_hop((U8*)d, -1);
- char *e = d++;
- while (e-- > c)
- *(e + 1) = *e;
- *c = (char) ILLEGAL_UTF8_BYTE;
- /* mark the range as done, and continue */
- dorange = FALSE;
- didrange = TRUE;
- continue;
- }
- i = d - SvPVX_const(sv); /* remember current offset */
+ /* Get the range-ends code point values. */
+ if (has_utf8) {
+ /* We know the utf8 is valid, because we just constructed
+ * it ourselves in previous loop iterations */
+ min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
+ range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
+ range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
+ }
+ else {
+ min_ptr = max_ptr - 1;
+ range_min = * (U8*) min_ptr;
+ range_max = * (U8*) max_ptr;
+ }
+
#ifdef EBCDIC
- SvGROW(sv,
- SvLEN(sv) + ((has_utf8)
- ? (512 - UTF_CONTINUATION_MARK
- + UNISKIP(0x100))
- : 256));
- /* How many two-byte within 0..255: 128 in UTF-8,
- * 96 in UTF-8-mod. */
-#else
- SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
+ /* On EBCDIC platforms, we may have to deal with portable
+ * ranges. These happen if at least one range endpoint is a
+ * Unicode value (\N{...}), or if the range is a subset of
+ * [A-Z] or [a-z], and both ends are literal characters,
+ * like 'A', and not like \x{C1} */
+ if ((convert_unicode
+ = cBOOL(backslash_N) /* \N{} forces Unicode, hence
+ portable range */
+ || ( ! non_portable_endpoint
+ && (( isLOWER_A(range_min) && isLOWER_A(range_max))
+ || (isUPPER_A(range_min) && isUPPER_A(range_max))))
+ )) {
+
+ /* Special handling is needed for these portable ranges.
+ * They are defined to all be in Unicode terms, which
+ * include all Unicode code points between the end points.
+ * Convert to Unicode to get the Unicode range. Later we
+ * will convert each code point in the range back to
+ * native. */
+ range_min = NATIVE_TO_UNI(range_min);
+ range_max = NATIVE_TO_UNI(range_max);
+ }
#endif
- d = SvPVX(sv) + i; /* refresh d after realloc */
+
+ if (range_min > range_max) {
#ifdef EBCDIC
- if (has_utf8) {
- int j;
- for (j = 0; j <= 1; j++) {
- char * const c = (char*)utf8_hop((U8*)d, -1);
- const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
- if (j)
- min = (U8)uv;
- else if (uv < 256)
- max = (U8)uv;
- else {
- max = (U8)0xff; /* only to \xff */
- uvmax = uv; /* \x{100} to uvmax */
- }
- d = c; /* eat endpoint chars */
- }
+ if (convert_unicode) {
+ /* Need to convert back to native for meaningful
+ * messages for this platform */
+ range_min = UNI_TO_NATIVE(range_min);
+ range_max = UNI_TO_NATIVE(range_max);
+ }
+#endif
+
+ /* Use the characters themselves for the error message if
+ * ASCII printables; otherwise some visible representation
+ * of them */
+ if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
+ Perl_croak(aTHX_
+ "Invalid range \"%c-%c\" in transliteration operator",
+ (char)range_min, (char)range_max);
+ }
+#ifdef EBCDIC
+ else if (convert_unicode) {
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ Perl_croak(aTHX_
+ "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
+ " in transliteration operator",
+ range_min, range_max);
+ }
+#endif
+ else {
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ Perl_croak(aTHX_
+ "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
+ " in transliteration operator",
+ range_min, range_max);
+ }
}
- else {
+
+ if (has_utf8) {
+
+ /* We try to avoid creating a swash. If the upper end of
+ * this range is below 256, this range won't force a swash;
+ * otherwise it does force a swash, and as long as we have
+ * to have one, we might as well not expand things out.
+ * But if it's EBCDIC, we may have to look at each
+ * character below 256 if we have to convert to/from
+ * Unicode values */
+ if (range_max > 255
+#ifdef EBCDIC
+ && (range_min > 255 || ! convert_unicode)
#endif
- d -= 2; /* eat the first char and the - */
- min = (U8)*d; /* first char in range */
- max = (U8)d[1]; /* last char in range */
+ ) {
+ /* Move the high character one byte to the right; then
+ * insert between it and the range begin, an illegal
+ * byte which serves to indicate this is a range (using
+ * a '-' could be ambiguous). */
+ char *e = d++;
+ while (e-- > max_ptr) {
+ *(e + 1) = *e;
+ }
+ *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
+ goto range_done;
+ }
+
+ /* Here, we're going to expand out the range. For EBCDIC
+ * the range can extend above 255 (not so in ASCII), so
+ * for EBCDIC, split it into the parts above and below
+ * 255/256 */
#ifdef EBCDIC
- }
+ if (range_max > 255) {
+ real_range_max = range_max;
+ range_max = 255;
+ }
#endif
+ }
+
+ /* Here we need to expand out the string to contain each
+ * character in the range. Grow the output to handle this */
+
+ save_offset = min_ptr - SvPVX_const(sv);
- if (min > max) {
- Perl_croak(aTHX_
- "Invalid range \"%c-%c\" in transliteration operator",
- (char)min, (char)max);
+ /* The base growth is the number of code points in the range */
+ grow = range_max - range_min + 1;
+ if (has_utf8) {
+
+ /* But if the output is UTF-8, some of those characters may
+ * need two bytes (since the maximum range value here is
+ * 255, the max bytes per character is two). On ASCII
+ * platforms, it's not much trouble to get an accurate
+ * count of what's needed. But on EBCDIC, the ones that
+ * need 2 bytes are scattered around, so just use a worst
+ * case value instead of calculating for that platform. */
+#ifdef EBCDIC
+ grow *= 2;
+#else
+ /* Only those above 127 require 2 bytes. This may be
+ * everything in the range, or not */
+ if (range_min > 127) {
+ grow *= 2;
+ }
+ else if (range_max > 127) {
+ grow += range_max - 127;
+ }
+#endif
}
+ /* Subtract 3 for the bytes that were already accounted for
+ * (min, max, and the hyphen) */
+ d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3);
+
#ifdef EBCDIC
- /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
- * any subsets of these ranges into individual characters */
- if (literal_endpoint == 2
- && ((isLOWER_A(min) && isLOWER_A(max))
- || (isUPPER_A(min) && isUPPER_A(max))))
- {
- for (i = min; i <= max; i++) {
- if (isALPHA_A(i))
- *d++ = i;
+ /* Here, we expand out the range. */
+ if (convert_unicode) {
+ IV i;
+
+ /* Recall that the min and max are now in Unicode terms, so
+ * we have to convert each character to its native
+ * equivalent */
+ if (has_utf8) {
+ for (i = range_min; i <= range_max; i++) {
+ append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
+ (U8 **) &d);
+ }
+ }
+ else {
+ for (i = range_min; i <= range_max; i++) {
+ *d++ = (char)LATIN1_TO_NATIVE((U8) i);
+ }
}
}
- else
+ else
#endif
- for (i = min; i <= max; i++)
-#ifdef EBCDIC
- if (has_utf8) {
- append_utf8_from_native_byte(i, &d);
+ /* Always gets run for ASCII, and sometimes for EBCDIC. */
+ {
+ IV i;
+
+ /* Here, no conversions are necessary, which means that the
+ * first character in the range is already in 'd' and
+ * valid, so we can skip overwriting it */
+ if (has_utf8) {
+ d += UTF8SKIP(d);
+ for (i = range_min + 1; i <= range_max; i++) {
+ append_utf8_from_native_byte((U8) i, (U8 **) &d);
}
- else
-#endif
+ }
+ else {
+ d++;
+ for (i = range_min + 1; i <= range_max; i++) {
*d++ = (char)i;
-
+ }
+ }
+ }
+
#ifdef EBCDIC
- if (uvmax) {
- d = (char*)uvchr_to_utf8((U8*)d, 0x100);
- if (uvmax > 0x101)
+ /* If the original range extended above 255, add in that portion. */
+ if (real_range_max) {
+ *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
+ *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
+ if (real_range_max > 0x101)
*d++ = (char) ILLEGAL_UTF8_BYTE;
- if (uvmax > 0x100)
- d = (char*)uvchr_to_utf8((U8*)d, uvmax);
+ if (real_range_max > 0x100)
+ d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
}
#endif
+ range_done:
/* mark the range as done, and continue */
- dorange = FALSE;
didrange = TRUE;
+ dorange = FALSE;
#ifdef EBCDIC
- literal_endpoint = 0;
+ non_portable_endpoint = 0;
+ backslash_N = 0;
#endif
continue;
- }
-
- /* range begins (ignore - as first or last char) */
- else if (*s == '-' && s+1 < send && s != start) {
- if (didrange) {
- Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
- }
- if (has_utf8
-#ifdef EBCDIC
- && !native_range
-#endif
- ) {
- *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
- s++;
- continue;
- }
- dorange = TRUE;
- s++;
- }
- else {
- didrange = FALSE;
-#ifdef EBCDIC
- literal_endpoint = 0;
- native_range = TRUE;
-#endif
- }
- }
-
- /* if we get to any of these else's, we're not doing a
- * transliteration. */
-
+ } /* End of is a range */
+ } /* End of transliteration. Joins main code after these else's */
else if (*s == '[' && PL_lex_inpat && !in_charclass) {
char *s1 = s-1;
int esc = 0;
}
switch (*s) {
-
- /* quoted - in transliterations */
- case '-':
- if (PL_lex_inwhat == OP_TRANS) {
- *d++ = *s++;
- continue;
- }
- /* FALLTHROUGH */
default:
{
if ((isALPHANUMERIC(*s)))
}
NUM_ESCAPE_INSERT:
- /* Insert oct or hex escaped character. There will always be
- * enough room in sv since such escapes will be longer than any
- * UTF-8 sequence they can end up as, except if they force us
- * to recode the rest of the string into utf8 */
+ /* Insert oct or hex escaped character. */
/* Here uv is the ordinal of the next character being added */
- if (!UVCHR_IS_INVARIANT(uv)) {
+ if (UVCHR_IS_INVARIANT(uv)) {
+ *d++ = (char) uv;
+ }
+ else {
if (!has_utf8 && uv > 255) {
/* Might need to recode whatever we have accumulated so
* far if it contains any chars variant in utf8 or
}
if (has_utf8) {
+ /* Usually, there will already be enough room in 'sv'
+ * since such escapes are likely longer than any UTF-8
+ * sequence they can end up as. This isn't the case on
+ * EBCDIC where \x{40000000} contains 12 bytes, and the
+ * UTF-8 for it contains 14. And, we have to allow for
+ * a trailing NUL. It probably can't happen on ASCII
+ * platforms, but be safe */
+ const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
+ + 1;
+ if (UNLIKELY(needed > SvLEN(sv))) {
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ d = sv_grow(sv, needed) + SvCUR(sv);
+ }
+
d = (char*)uvchr_to_utf8((U8*)d, uv);
if (PL_lex_inwhat == OP_TRANS
- && PL_sublex_info.sub_op)
+ && PL_parser->lex_sub_op)
{
- PL_sublex_info.sub_op->op_private |=
+ PL_parser->lex_sub_op->op_private |=
(PL_lex_repl ? OPpTRANS_FROM_UTF
: OPpTRANS_TO_UTF);
}
-#ifdef EBCDIC
- if (uv > 255 && !dorange)
- native_range = FALSE;
-#endif
}
else {
*d++ = (char)uv;
}
}
- else {
- *d++ = (char) uv;
- }
+#ifdef EBCDIC
+ non_portable_endpoint++;
+#endif
continue;
case 'N':
/* In a non-pattern \N must be like \N{U+0041}, or it can be a
* named character, like \N{LATIN SMALL LETTER A}, or a named
* sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
- * GRAVE}. For convenience all three forms are referred to as
- * "named characters" below.
+ * GRAVE} (except y/// can't handle the latter, croaking). For
+ * convenience all three forms are referred to as "named
+ * characters" below.
*
* For patterns, \N also can mean to match a non-newline. Code
* before this 'switch' statement should already have handled
*
* The structure of this section of code (besides checking for
* errors and upgrading to utf8) is:
- * If the named character is of the form \N{U+...}, pass it
+ * If the named character is of the form \N{U+...}, pass it
* through if a pattern; otherwise convert the code point
* to utf8
- * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
- * if a pattern; otherwise convert to utf8
+ * Otherwise must be some \N{NAME}: convert to
+ * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
+ *
+ * Transliteration is an exception. The conversion to utf8 is
+ * only done if the code point requires it to be representable.
*
* Here, 's' points to the 'N'; the test below is guaranteed to
* succeed if we are being called on a pattern, as we already
if (len == 0 || (len != (STRLEN)(e - s)))
goto bad_NU;
- /* If the destination is not in utf8, unconditionally
- * recode it to be so. This is because \N{} implies
- * Unicode semantics, and scalars have to be in utf8
- * to guarantee those semantics */
- if (! has_utf8) {
+ /* For non-tr///, if the destination is not in utf8,
+ * unconditionally recode it to be so. This is
+ * because \N{} implies Unicode semantics, and scalars
+ * have to be in utf8 to guarantee those semantics.
+ * tr/// doesn't care about Unicode rules, so no need
+ * there to upgrade to UTF-8 for small enough code
+ * points */
+ if (! has_utf8 && ( uv > 0xFF
+ || PL_lex_inwhat != OP_TRANS))
+ {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
sv_utf8_upgrade_flags_grow(
sv,
SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
+ OFFUNISKIP(uv) + (STRLEN)(send - e) + 1);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
/* Add the (Unicode) code point to the output. */
- if (UNI_IS_INVARIANT(uv)) {
+ if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
*d++ = (char) LATIN1_TO_NATIVE(uv);
}
else {
/* The regex compiler is
* expecting Unicode, not
* native */
- (U8) NATIVE_TO_LATIN1(*str));
+ NATIVE_TO_LATIN1(*str));
PERL_MY_SNPRINTF_POST_GUARD(len,
sizeof(hex_string));
Copy(hex_string, d, 3, char);
else { /* Here, not in a pattern. Convert the name to a
* string. */
- /* If destination is not in utf8, unconditionally
- * recode it to be so. This is because \N{} implies
- * Unicode semantics, and scalars have to be in utf8
- * to guarantee those semantics */
- if (! has_utf8) {
+ if (PL_lex_inwhat == OP_TRANS) {
+ str = SvPV_const(res, len);
+ if (len > ((SvUTF8(res))
+ ? UTF8SKIP(str)
+ : 1U))
+ {
+ yyerror(Perl_form(aTHX_
+ "%.*s must not be a named sequence"
+ " in transliteration operator",
+ /* +1 to include the "}" */
+ (int) (e + 1 - start), start));
+ goto end_backslash_N;
+ }
+ }
+ else if (! SvUTF8(res)) {
+ /* Make sure \N{} return is UTF-8. This is because
+ * \N{} implies Unicode semantics, and scalars have
+ * to be in utf8 to guarantee those semantics; but
+ * not needed in tr/// */
+ sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
+ str = SvPV_const(res, len);
+ }
+
+ /* Upgrade destination to be utf8 if this new
+ * component is */
+ if (! has_utf8 && SvUTF8(res)) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
const STRLEN off = d - SvPVX_const(sv);
d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
}
- if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
- sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
- str = SvPV_const(res, len);
- }
Copy(str, d, len, char);
d += len;
}
SvREFCNT_dec(res);
} /* End \N{NAME} */
+
+ end_backslash_N:
#ifdef EBCDIC
- if (!dorange)
- native_range = FALSE; /* \N{} is defined to be Unicode */
+ backslash_N++; /* \N{} is defined to be Unicode */
#endif
s = e + 1; /* Point to just after the '}' */
continue;
else {
yyerror("Missing control char name in \\c");
}
+#ifdef EBCDIC
+ non_portable_endpoint++;
+#endif
continue;
/* printf-style backslashes, formfeeds, newlines, etc */
s++;
continue;
} /* end if (backslash) */
-#ifdef EBCDIC
- else
- literal_endpoint++;
-#endif
default_action:
/* If we started with encoded form, or already know we want it,
if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
STRLEN len = 1;
-
/* One might think that it is wasted effort in the case of the
* source being utf8 (this_utf8 == TRUE) to take the next character
* in the source, convert it to an unsigned value, and then convert
s += len;
d = (char*)uvchr_to_utf8((U8*)d, nextuv);
-#ifdef EBCDIC
- if (uv > 255 && !dorange)
- native_range = FALSE;
-#endif
}
else {
*d++ = *s++;
" >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (IN_ENCODING && !has_utf8) {
- sv_recode_to_utf8(sv, _get_encoding());
- if (SvUTF8(sv))
- has_utf8 = TRUE;
- }
if (has_utf8) {
SvUTF8_on(sv);
- if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
- PL_sublex_info.sub_op->op_private |=
+ if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
+ PL_parser->lex_sub_op->op_private |=
(PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
}
sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
type, typelen);
}
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
}
LEAVE_with_name("scan_const");
return s;
tmpbuf[len] = '\0';
goto bare_package;
}
- indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
- if (indirgv && GvCVu(indirgv))
+ indirgv = gv_fetchpvn_flags(tmpbuf, len,
+ GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
+ SVt_PVCV);
+ if (indirgv && SvTYPE(indirgv) != SVt_NULL
+ && (!isGV(indirgv) || GvCVu(indirgv)))
return 0;
/* filehandle or package name makes it a method */
if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
return 0; /* no assumptions -- "=>" quotes bareword */
bare_package:
- NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
+ NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
PL_expect = XTERM;
- force_next(WORD);
+ force_next(BAREWORD);
PL_bufptr = s;
return *s == '(' ? FUNCMETH : METHOD;
}
if (*s == ';' || *s == '}'
|| (s = skipspace(s), (*s == ';' || *s == '}'))) {
NEXTVAL_NEXTTOKE.opval = NULL;
- force_next(WORD);
+ force_next(BAREWORD);
}
else if (*s == 'v') {
- s = force_word(s,WORD,FALSE,TRUE);
+ s = force_word(s,BAREWORD,FALSE,TRUE);
s = force_version(s, FALSE);
}
}
else {
- s = force_word(s,WORD,FALSE,TRUE);
+ s = force_word(s,BAREWORD,FALSE,TRUE);
s = force_version(s, FALSE);
}
pl_yylval.ival = is_use;
static const char* const exp_name[] =
{ "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
"ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
- "TERMORDORDOR"
+ "SIGVAR", "TERMORDORDOR"
};
#endif
-#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
+#define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
STATIC bool
-S_word_takes_any_delimeter(char *p, STRLEN len)
+S_word_takes_any_delimiter(char *p, STRLEN len)
{
return (len == 1 && strchr("msyq", p[0]))
|| (len == 2
pl_yylval.ival = OPpSLICEWARNING;
}
+#define lex_token_boundary() S_lex_token_boundary(aTHX)
+static void
+S_lex_token_boundary(pTHX)
+{
+ PL_oldoldbufptr = PL_oldbufptr;
+ PL_oldbufptr = PL_bufptr;
+}
+
+#define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
+static char *
+S_vcs_conflict_marker(pTHX_ char *s)
+{
+ lex_token_boundary();
+ PL_bufptr = s;
+ yyerror("Version control conflict marker");
+ while (s < PL_bufend && *s != '\n')
+ s++;
+ return s;
+}
+
/*
yylex
The type of the next token
Structure:
+ Check if we have already built the token; if so, use it.
Switch based on the current state:
- - if we already built the token before, use it
- if we have a case modifier in a string, deal with that
- handle other cases of interpolation inside a string
- scan the next line if we are inside a format
- In the normal state switch on the next character:
+ In the normal state, switch on the next character:
- default:
if alphabetic, go to key lookup
- unrecoginized character - croak
+ unrecognized character - croak
- 0/4/26: handle end-of-line or EOF
- cases for whitespace
- \n and #: handle comments and line numbers
if (PL_nexttoke) {
PL_nexttoke--;
pl_yylval = PL_nextval[PL_nexttoke];
- if (!PL_nexttoke) {
- PL_lex_state = PL_lex_defer;
- PL_lex_defer = LEX_NORMAL;
- }
{
I32 next_type;
next_type = PL_nexttype[PL_nexttoke];
/* FALLTHROUGH */
case LEX_INTERPEND:
- /* Treat state as LEX_NORMAL if we have no inner lexing scope.
- XXX This hack can be removed if we stop setting PL_lex_state to
- LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */
- if (UNLIKELY(!PL_lex_inwhat)) {
- PL_lex_state = LEX_NORMAL;
- break;
- }
-
if (PL_lex_dojoin) {
const U8 dojoin_was = PL_lex_dojoin;
PL_lex_dojoin = FALSE;
PL_lex_state = LEX_INTERPCONCAT;
PL_lex_allbrackets--;
- return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
+ return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
}
if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
&& SvEVALED(PL_lex_repl))
else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
PL_bufptr - PL_parser->lex_shared->re_eval_start);
NEXTVAL_NEXTTOKE.opval =
- (OP*)newSVOP(OP_CONST, 0,
+ newSVOP(OP_CONST, 0,
sv);
force_next(THING);
PL_parser->lex_shared->re_eval_start = NULL;
Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
(long) PL_lex_brackets);
#endif
- /* Treat state as LEX_NORMAL when not in an inner lexing scope.
- XXX This hack can be removed if we stop setting PL_lex_state to
- LEX_KNOWNEXT. */
- if (UNLIKELY(!PL_lex_inwhat)) {
- PL_lex_state = LEX_NORMAL;
- break;
- }
-
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
SV *sv = newSVsv(PL_linestr);
sv = tokeq(sv);
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
s = PL_bufend;
}
else {
PL_oldbufptr = s;
PL_parser->saw_infix_sigil = 0;
+ if (PL_in_my == KEY_sigvar) {
+ /* we expect the sigil and optional var name part of a
+ * signature element here. Since a '$' is not necessarily
+ * followed by a var name, handle it specially here; the general
+ * yylex code would otherwise try to interpret whatever follows
+ * as a var; e.g. ($, ...) would be seen as the var '$,'
+ */
+
+ char sigil;
+
+ s = skipspace(s);
+ sigil = *s++;
+ PL_bufptr = s; /* for error reporting */
+ switch (sigil) {
+ case '$':
+ case '@':
+ case '%':
+ /* spot stuff that looks like an prototype */
+ if (strchr("$:@%&*;\\[]", *s)) {
+ yyerror("Illegal character following sigil in a subroutine signature");
+ break;
+ }
+ /* '$#' is banned, while '$ # comment' isn't */
+ if (*s == '#') {
+ yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
+ break;
+ }
+ s = skipspace(s);
+ if (isIDFIRST_lazy_if(s, UTF)) {
+ char *dest = PL_tokenbuf + 1;
+ /* read var name, including sigil, into PL_tokenbuf */
+ PL_tokenbuf[0] = sigil;
+ parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
+ 0, cBOOL(UTF), FALSE);
+ *dest = '\0';
+ assert(PL_tokenbuf[1]); /* we have a variable name */
+ NEXTVAL_NEXTTOKE.ival = sigil;
+ force_next('p'); /* force a signature pending identifier */
+ }
+ else
+ PL_in_my = 0;
+ PL_expect = XOPERATOR;
+ break;
+
+ case ')':
+ PL_expect = XBLOCK;
+ break;
+ case ',': /* handle ($a,,$b) */
+ break;
+
+ default:
+ PL_in_my = 0;
+ yyerror("A signature parameter must start with '$', '@' or '%'");
+ /* very crude error recovery: skip to likely next signature
+ * element */
+ while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
+ s++;
+ break;
+ }
+ TOKEN(sigil);
+ }
+
retry:
switch (*s) {
default:
- if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
+ if (UTF) {
+ if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
+ ENTER;
+ SAVESPTR(PL_warnhook);
+ PL_warnhook = PERL_WARNHOOK_FATAL;
+ utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
+ LEAVE;
+ }
+ if (isIDFIRST_utf8((U8*)s)) {
+ goto keylookup;
+ }
+ }
+ else if (isALNUMC(*s)) {
goto keylookup;
- {
+ }
+ {
SV *dsv = newSVpvs_flags("", SVs_TEMP);
- const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
- UTF8SKIP(s),
- SVs_TEMP | SVf_UTF8),
- 10, UNI_DISPLAY_ISPRINT)
- : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
+ const char *c;
+ if (UTF) {
+ STRLEN skiplen = UTF8SKIP(s);
+ STRLEN stravail = PL_bufend - s;
+ c = sv_uni_display(dsv, newSVpvn_flags(s,
+ skiplen > stravail ? stravail : skiplen,
+ SVs_TEMP | SVf_UTF8),
+ 10, UNI_DISPLAY_ISPRINT);
+ }
+ else {
+ c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
+ }
len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
if (len > UNRECOGNIZED_PRECEDE_COUNT) {
- d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
+ d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
} else {
d = PL_linestart;
}
}
PL_parser->preambling = CopLINE(PL_curcop);
} else
- sv_setpvs(PL_linestr,"");
+ SvPVCLEAR(PL_linestr);
if (PL_preambleav) {
SV **svp = AvARRAY(PL_preambleav);
SV **const end = svp + AvFILLp(PL_preambleav);
}
if (PL_parser->in_pod) {
/* Incest with pod. */
- if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
- sv_setpvs(PL_linestr, "");
+ if (*s == '=' && strEQs(s, "=cut") && !isALPHA(s[4])) {
+ SvPVCLEAR(PL_linestr);
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
/* if we have already added "LINE: while (<>) {",
we must not do it again */
{
- sv_setpvs(PL_linestr, "");
+ SvPVCLEAR(PL_linestr);
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
while (s < PL_bufend && SPACE_OR_TAB(*s))
s++;
- if (strnEQ(s,"=>",2)) {
- s = force_word(PL_bufptr,WORD,FALSE,FALSE);
+ if (strEQs(s,"=>")) {
+ s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
OPERATOR('-'); /* unary minus */
}
sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
if (*d == '(') {
d = scan_str(d,TRUE,TRUE,FALSE,NULL);
- COPLINE_SET_FROM_MULTI_END;
if (!d) {
- /* MUST advance bufptr here to avoid bogus
- "at end of line" context messages from yyerror().
- */
- PL_bufptr = s + len;
- yyerror("Unterminated attribute parameter in attribute list");
if (attrs)
op_free(attrs);
sv_free(sv);
- return REPORT(0); /* EOF indicator */
+ Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
}
+ COPLINE_SET_FROM_MULTI_END;
}
if (PL_lex_stuff) {
sv_catsv(sv, PL_lex_stuff);
d++;
if (*d == '}') {
const char minus = (PL_tokenbuf[0] == '-');
- s = force_word(s + minus, WORD, FALSE, TRUE);
+ s = force_word(s + minus, BAREWORD, FALSE, TRUE);
if (minus)
force_next('-');
}
else
/* skip plain q word */
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
else if (isWORDCHAR_lazy_if(t,UTF)) {
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
while (t < PL_bufend && isSPACE(*t))
t++;
PL_expect = XTERM;
break;
}
- if (strnEQ(s, "sub", 3)) {
+ if (strEQs(s, "sub")) {
d = s + 3;
d = skipspace(d);
if (*d == ':') {
{
const char tmp = *s++;
if (tmp == '=') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "=====")) {
+ s = vcs_conflict_marker(s + 5);
+ goto retry;
+ }
if (!PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
{
while (s < d) {
if (*s++ == '\n') {
incline(s);
- if (strnEQ(s,"=cut",4)) {
+ if (strEQs(s,"=cut")) {
s = strchr(s,'\n');
if (s)
s++;
if (PL_expect != XOPERATOR) {
if (s[1] != '<' && !strchr(s,'>'))
check_uni();
- if (s[1] == '<' && s[2] != '>')
+ if (s[1] == '<' && s[2] != '>') {
+ if ((s == PL_linestart || s[-1] == '\n') && strEQs(s+2, "<<<<<")) {
+ s = vcs_conflict_marker(s + 7);
+ goto retry;
+ }
s = scan_heredoc(s);
+ }
else
s = scan_inputsymbol(s);
PL_expect = XOPERATOR;
{
char tmp = *s++;
if (tmp == '<') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "<<<<<")) {
+ s = vcs_conflict_marker(s + 5);
+ goto retry;
+ }
if (*s == '=' && !PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
{
{
const char tmp = *s++;
if (tmp == '>') {
+ if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, ">>>>>")) {
+ s = vcs_conflict_marker(s + 5);
+ goto retry;
+ }
if (*s == '=' && !PL_lex_allbrackets
&& PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
{
TOKEN('$');
case '@':
- if (PL_expect == XOPERATOR)
- no_op("Array", s);
- else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
+ if (PL_expect == XPOSTDEREF)
+ POSTDEREF('@');
PL_tokenbuf[0] = '@';
s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
+ if (PL_expect == XOPERATOR) {
+ d = s;
+ if (PL_bufptr > s) {
+ d = PL_bufptr-1;
+ PL_bufptr = PL_oldbufptr;
+ }
+ no_op("Array", d);
+ }
pl_yylval.ival = 0;
if (!PL_tokenbuf[1]) {
PREREF('@');
TERM(THING);
case '\'':
+ if ( PL_expect == XOPERATOR
+ && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
+ return deprecate_commaless_var_list();
+
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s)
missingterm(NULL);
COPLINE_SET_FROM_MULTI_END;
DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
if (PL_expect == XOPERATOR) {
- if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- return deprecate_commaless_var_list();
- }
- else
- no_op("String",s);
+ no_op("String",s);
}
pl_yylval.ival = OP_CONST;
TERM(sublex_start());
case '"':
+ if ( PL_expect == XOPERATOR
+ && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
+ return deprecate_commaless_var_list();
+
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
DEBUG_T( {
if (s)
"### Saw unterminated string\n");
} );
if (PL_expect == XOPERATOR) {
- if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
- return deprecate_commaless_var_list();
- }
- else
no_op("String",s);
}
if (!s)
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
/* Some keywords can be followed by any delimiter, including ':' */
- anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
+ anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
/* x::* is just a word, unless x is "CORE" */
if (!anydelim && *s == ':' && s[1] == ':') {
fat_arrow:
CLINE;
pl_yylval.opval
- = (OP*)newSVOP(OP_CONST, 0,
+ = newSVOP(OP_CONST, 0,
S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
pl_yylval.opval->op_private = OPpCONST_BARE;
- TERM(WORD);
+ TERM(BAREWORD);
}
/* Check for plugged-in keyword */
bool arrow;
STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
STRLEN soff = s - SvPVX(PL_linestr);
- s = skipspace_flags(s, LEX_NO_INCLINE);
+ s = peekspace(s);
arrow = *s == '=' && s[1] == '>';
PL_bufptr = SvPVX(PL_linestr) + bufoff;
s = SvPVX(PL_linestr) + soff;
reserved_word:
switch (tmp) {
- default: /* not a keyword */
/* Trade off - by using this evil construction we can pull the
variable gv into the block labelled keylookup. If not, then
we have to give it function scope so that the goto from the
earlier ':' case doesn't bypass the initialisation. */
- if (0) {
just_a_word_zero_gv:
sv = NULL;
cv = NULL;
orig_keyword = 0;
lex = 0;
off = 0;
- }
+ default: /* not a keyword */
just_a_word: {
int pkgname = 0;
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
/* Presume this is going to be a bareword of some sort. */
CLINE;
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
+ pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
pl_yylval.opval->op_private = OPpCONST_BARE;
/* And if "Foo::", then that's what it certainly is. */
SvUTF8_on(sv);
SvREADONLY_on(sv);
}
- TERM(WORD);
+ TERM(BAREWORD);
}
/* If followed by a paren, it's certainly a subroutine. */
off ? rv2cv_op : pl_yylval.opval;
if (off)
op_free(pl_yylval.opval), force_next(PRIVATEREF);
- else op_free(rv2cv_op), force_next(WORD);
+ else op_free(rv2cv_op), force_next(BAREWORD);
pl_yylval.ival = 0;
TOKEN('&');
}
pl_yylval.opval->op_folded = 1;
pl_yylval.opval->op_flags |= OPf_SPECIAL;
}
- TOKEN(WORD);
+ TOKEN(BAREWORD);
}
op_free(pl_yylval.opval);
pl_yylval.opval =
- off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
+ off ? newCVREF(0, rv2cv_op) : rv2cv_op;
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_ENTERSUB;
}
NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
PL_expect = XTERM;
- force_next(off ? PRIVATEREF : WORD);
+ force_next(off ? PRIVATEREF : BAREWORD);
if (!PL_lex_allbrackets
&& PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
{
"Ambiguous use of %c resolved as operator %c",
lastchar, lastchar);
}
- TOKEN(WORD);
+ TOKEN(BAREWORD);
}
case KEY___FILE__:
FUN0OP(
- (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
+ newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
);
case KEY___LINE__:
FUN0OP(
- (OP*)newSVOP(OP_CONST, 0,
+ newSVOP(OP_CONST, 0,
Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
);
case KEY___PACKAGE__:
FUN0OP(
- (OP*)newSVOP(OP_CONST, 0,
+ newSVOP(OP_CONST, 0,
(PL_curstash
? newSVhek(HvNAME_HEK(PL_curstash))
: &PL_sv_undef))
if (!IN_BYTES) {
if (UTF)
PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
- else if (IN_ENCODING) {
- SV *name;
- dSP;
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(_get_encoding());
- PUTBACK;
- call_method("name", G_SCALAR);
- SPAGAIN;
- name = POPs;
- PUTBACK;
- PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
- Perl_form(aTHX_ ":encoding(%"SVf")",
- SVfARG(name)));
- FREETMPS;
- LEAVE;
- }
}
#endif
PL_rsfp = NULL;
1, &len);
if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
&& !keyword(PL_tokenbuf + 1, len, 0)) {
+ SSize_t off = s-SvPVX(PL_linestr);
d = skipspace(d);
+ s = SvPVX(PL_linestr)+off;
if (*d == '(') {
force_ident_maybe_lex('&');
s = d;
char *p = s;
if ((PL_bufend - p) >= 3
- && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
+ && strEQs(p, "my") && isSPACE(*(p + 2)))
{
p += 2;
}
else if ((PL_bufend - p) >= 4
- && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
+ && strEQs(p, "our") && isSPACE(*(p + 3)))
p += 3;
p = skipspace(p);
/* skip optional package name, as in "for my abc $x (..)" */
p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
p = skipspace(p);
}
- if (*p != '$')
+ if (*p != '$' && *p != '\\')
Perl_croak(aTHX_ "Missing $ on loop variable");
}
OPERATOR(FOR);
UNI(OP_LCFIRST);
case KEY_local:
- pl_yylval.ival = 0;
OPERATOR(LOCAL);
case KEY_length:
case KEY_my:
case KEY_state:
if (PL_in_my) {
+ PL_bufptr = s;
yyerror(Perl_form(aTHX_
"Can't redeclare \"%s\" in \"%s\"",
tmp == KEY_my ? "my" :
s = skipspace(s);
if (isIDFIRST_lazy_if(s,UTF)) {
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
- if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
- {
- if (!FEATURE_LEXSUBS_IS_ENABLED)
- Perl_croak(aTHX_
- "Experimental \"%s\" subs not enabled",
- tmp == KEY_my ? "my" :
- tmp == KEY_state ? "state" : "our");
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
- "The lexical_subs feature is experimental");
+ if (len == 3 && strEQs(PL_tokenbuf, "sub"))
goto really_sub;
- }
PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
if (!PL_in_my_stash) {
char tmpbuf[1024];
yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
}
}
- pl_yylval.ival = 1;
+ else if (*s == '\\') {
+ if (!FEATURE_MYREF_IS_ENABLED)
+ Perl_croak(aTHX_ "The experimental declared_refs "
+ "feature is not enabled");
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
+ "Declaring references is experimental");
+ }
OPERATOR(MY);
case KEY_next:
LOP(OP_PACK,XTERM);
case KEY_package:
- s = force_word(s,WORD,FALSE,TRUE);
+ s = force_word(s,BAREWORD,FALSE,TRUE);
s = skipspace(s);
s = force_strict_version(s);
PREBLOCK(PACKAGE);
|| (s = force_version(s, TRUE), *s == 'v'))
{
*PL_tokenbuf = '\0';
- s = force_word(s,WORD,TRUE,TRUE);
+ s = force_word(s,BAREWORD,TRUE,TRUE);
if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
GV_ADD | (UTF ? SVf_UTF8 : 0));
checkcomma(s,PL_tokenbuf,"subroutine name");
s = skipspace(s);
PL_expect = XTERM;
- s = force_word(s,WORD,TRUE,TRUE);
+ s = force_word(s,BAREWORD,TRUE,TRUE);
LOP(OP_SORT,XREF);
case KEY_split:
const int key = tmp;
SV *format_name = NULL;
- d = s;
+ SSize_t off = s-SvPVX(PL_linestr);
s = skipspace(s);
+ d = SvPVX(PL_linestr)+off;
if (isIDFIRST_lazy_if(s,UTF)
|| *s == '\''
if (key == KEY_format) {
if (format_name) {
NEXTVAL_NEXTTOKE.opval
- = (OP*)newSVOP(OP_CONST,0, format_name);
+ = newSVOP(OP_CONST,0, format_name);
NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
- force_next(WORD);
+ force_next(BAREWORD);
}
PREBLOCK(FORMAT);
}
if (have_proto) {
NEXTVAL_NEXTTOKE.opval =
- (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
+ newSVOP(OP_CONST, 0, PL_lex_stuff);
PL_lex_stuff = NULL;
force_next(THING);
}
Looks up an identifier in the pad or in a package
+ is_sig indicates that this is a subroutine signature variable
+ rather than a plain pad var.
+
Returns:
PRIVATEREF if this is a lexical name.
- WORD if this belongs to a package.
+ BAREWORD if this belongs to a package.
Structure:
if we're in a my declaration
tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
}
else {
+ OP *o;
if (has_colon) {
/* "my" variable %s can't be in a package */
/* PL_no_myglob is constant */
GCC_DIAG_RESTORE;
}
- pl_yylval.opval = newOP(OP_PADANY, 0);
- pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
+ if (PL_in_my == KEY_sigvar) {
+ /* A signature 'padop' needs in addition, an op_first to
+ * point to a child sigdefelem, and an extra field to hold
+ * the signature index. We can achieve both by using an
+ * UNOP_AUX and (ab)using the op_aux field to hold the
+ * index. If we ever need more fields, use a real malloced
+ * aux strut instead.
+ */
+ o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
+ INT2PTR(UNOP_AUX_item *,
+ (PL_parser->sig_elems)));
+ o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
+ : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
+ : OPpARGELEM_HV);
+ }
+ else
+ o = newOP(OP_PADANY, 0);
+ o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
UTF ? SVf_UTF8 : 0);
+ if (PL_in_my == KEY_sigvar)
+ PL_in_my = 0;
+
+ pl_yylval.opval = o;
return PRIVATEREF;
}
}
SV * const sym = newSVhek(stashname);
sv_catpvs(sym, "::");
sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
+ pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
pl_yylval.opval->op_private = OPpCONST_ENTERED;
if (pit != '&')
gv_fetchsv(sym,
((PL_tokenbuf[0] == '$') ? SVt_PV
: (PL_tokenbuf[0] == '@') ? SVt_PVAV
: SVt_PVHV));
- return WORD;
+ return BAREWORD;
}
pl_yylval.opval = newOP(OP_PADANY, 0);
}
/*
- Whine if they've said @foo in a doublequoted string,
- and @foo isn't a variable we can find in the symbol
+ Whine if they've said @foo or @foo{key} in a doublequoted string,
+ and @foo (or %foo) isn't a variable we can find in the symbol
table.
*/
if (ckWARN(WARN_AMBIGUOUS)
&& !PL_lex_brackets)
{
GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
- ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
+ ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
+ SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
- /* DO NOT warn for @- and @+ */
- && !( PL_tokenbuf[2] == '\0'
- && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
)
{
/* Downgraded from fatal to warning 20000522 mjd */
}
/* build ops for a bareword */
- pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
+ pl_yylval.opval = newSVOP(OP_CONST, 0,
newSVpvn_flags(PL_tokenbuf + 1,
tokenbuf_len - 1,
UTF ? SVf_UTF8 : 0 ));
((PL_tokenbuf[0] == '$') ? SVt_PV
: (PL_tokenbuf[0] == '@') ? SVt_PVAV
: SVt_PVHV));
- return WORD;
+ return BAREWORD;
}
STATIC void
}
PERL_STATIC_INLINE void
-S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
+S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
+ bool is_utf8, bool check_dollar) {
PERL_ARGS_ASSERT_PARSE_IDENT;
for (;;) {
* the code path that triggers the "Bad name after" warning
* when looking for barewords.
*/
- && (*s)[2] != '$') {
+ && !(check_dollar && (*s)[2] == '$')) {
*(*d)++ = *(*s)++;
*(*d)++ = *(*s)++;
}
PERL_ARGS_ASSERT_SCAN_WORD;
- parse_ident(&s, &d, e, allow_package, is_utf8);
+ parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
*d = '\0';
*slp = d - dest;
return s;
* 2) '{'
* The final case currently doesn't get this far in the program, so we
* don't test for it. If that were to change, it would be ok to allow it.
- * c) When not under Unicode rules, any upper Latin1 character
- * d) Otherwise, when unicode rules are used, all XIDS characters.
+ * b) When not under Unicode rules, any upper Latin1 character
+ * c) Otherwise, when unicode rules are used, all XIDS characters.
*
* Because all ASCII characters have the same representation whether
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not.
- * EBCDIC already uses the rules that ASCII platforms will use after the
- * deprecation cycle; see comment below about the deprecation. */
-#ifdef EBCDIC
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ * '{' without knowing if is UTF-8 or not. */
+#define VALID_LEN_ONE_IDENT(s, is_utf8) \
(isGRAPH_A(*(s)) || ((is_utf8) \
? isIDFIRST_utf8((U8*) (s)) \
: (isGRAPH_L1(*s) \
&& LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8((U8*) (s)) \
- : ! isASCII_utf8((U8*) (s))))
-#endif
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
}
}
else { /* See if it is a "normal" identifier */
- parse_ident(&s, &d, e, 1, is_utf8);
+ parse_ident(&s, &d, e, 1, is_utf8, FALSE);
}
*d = '\0';
d = dest;
|| isDIGIT_A((U8)s[1])
|| s[1] == '$'
|| s[1] == '{'
- || strnEQ(s+1,"::",2)) )
+ || strEQs(s+1,"::")) )
{
/* Dereferencing a value in a scalar variable.
The alternatives are different syntaxes for a scalar variable.
: 1)
&& VALID_LEN_ONE_IDENT(s, is_utf8))
{
- /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
- * because often it has no graphic representation. (We can't get to
- * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
- * test for it.) */
- if ((is_utf8)
- ? ! isGRAPH_utf8( (U8*) s)
- : (! isGRAPH_L1( (U8) *s)
- || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
- {
- deprecate("literal non-graphic characters in variable names");
- }
-
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
else if (ck_uni && bracket == -1)
check_uni();
if (bracket != -1) {
+ bool skip;
+ char *s2;
/* If we were processing {...} notation then... */
if (isIDFIRST_lazy_if(d,is_utf8)) {
/* if it starts as a valid identifier, assume that it is one.
(the later check for } being at the expected point will trap
cases where this doesn't pan out.) */
d += is_utf8 ? UTF8SKIP(d) : 1;
- parse_ident(&s, &d, e, 1, is_utf8);
+ parse_ident(&s, &d, e, 1, is_utf8, TRUE);
*d = '\0';
tmp_copline = CopLINE(PL_curcop);
if (s < PL_bufend && isSPACE(*s)) {
if ( !tmp_copline )
tmp_copline = CopLINE(PL_curcop);
- if (s < PL_bufend && isSPACE(*s)) {
- s = skipspace(s);
- }
+ if ((skip = s < PL_bufend && isSPACE(*s)))
+ /* Avoid incrementing line numbers or resetting PL_linestart,
+ in case we have to back up. */
+ s2 = peekspace(s);
+ else
+ s2 = s;
/* Expect to find a closing } after consuming any trailing whitespace.
*/
- if (*s == '}') {
+ if (*s2 == '}') {
+ /* Now increment line numbers if applicable. */
+ if (skip)
+ s = skipspace(s);
s++;
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
PL_lex_state = LEX_INTERPEND;
"Use of /c modifier is meaningless without /g" );
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
PL_lex_op = (OP*)pm;
pl_yylval.ival = OP_MATCH;
PMOP *pm;
I32 first_start;
line_t first_line;
+ line_t linediff = 0;
I32 es = 0;
char charset = '\0'; /* character set modifier */
unsigned int x_mod_count = 0;
}
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
if ((pm->op_pmflags & PMf_CONTINUE)) {
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
sv_catpvs(repl, "do ");
}
sv_catpvs(repl, "{");
- sv_catsv(repl, PL_sublex_info.repl);
+ sv_catsv(repl, PL_parser->lex_sub_repl);
sv_catpvs(repl, "}");
- SvEVALED_on(repl);
- SvREFCNT_dec(PL_sublex_info.repl);
- PL_sublex_info.repl = repl;
- }
- if (CopLINE(PL_curcop) != first_line) {
- sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
- ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
- CopLINE(PL_curcop) - first_line;
+ ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = 1;
+ SvREFCNT_dec(PL_parser->lex_sub_repl);
+ PL_parser->lex_sub_repl = repl;
+ es = 1;
+ }
+
+
+ linediff = CopLINE(PL_curcop) - first_line;
+ if (linediff)
CopLINE_set(PL_curcop, first_line);
+
+ if (linediff || es) {
+ /* the IVX field indicates that the replacement string is a s///e;
+ * the NVX field indicates how many src code lines the replacement
+ * spreads over */
+ sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
+ ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
+ ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = es;
}
PL_lex_op = (OP*)pm;
o->op_private &= ~OPpTRANS_ALL;
o->op_private |= del|squash|complement|
(DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
- (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
+ (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF : 0);
PL_lex_op = o;
pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
SV *linestr;
char *bufend;
char * const olds = s;
- PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+ PERL_CONTEXT * const cx = CX_CUR();
/* These two fields are not set until an inner lexing scope is
entered. But we need them set here. */
shared->ls_bufptr = s;
might think. Odd syntax errors like s;@{<<; can gobble up
the implicit semicolon at the end of a flie, causing the
file handle to be closed even when we are not in a string
- eval. So shared may be null in that case. */
+ eval. So shared may be null in that case.
+ (Closing '}' here to balance the earlier open brace for
+ editors that look for matched pairs.) */
if (UNLIKELY(!shared))
goto interminable;
/* A LEXSHARED struct with a null ls_prev pointer is the outer-
goto streaming;
}
}
- else { /* eval */
+ else { /* eval or we've already hit EOF */
s = (char*)memchr((void*)s, '\n', PL_bufend - s);
- assert(s);
+ if (!s)
+ goto interminable;
}
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
&& cx->blk_eval.cur_text == linestr)
{
cx->blk_eval.cur_text = newSVsv(linestr);
- SvSCREAM_on(cx->blk_eval.cur_text);
+ cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
}
/* Copy everything from s onwards back to d. */
Move(s,d,bufend-s + 1,char);
else
{
SV *linestr_save;
+ char *oldbufptr_save;
+ char *oldoldbufptr_save;
streaming:
- sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
+ SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
term = PL_tokenbuf[1];
len--;
linestr_save = PL_linestr; /* must restore this afterwards */
d = s; /* and this */
+ oldbufptr_save = PL_oldbufptr;
+ oldoldbufptr_save = PL_oldoldbufptr;
PL_linestr = newSVpvs("");
PL_bufend = SvPVX(PL_linestr);
while (1) {
restore PL_linestr. */
SvREFCNT_dec_NN(PL_linestr);
PL_linestr = linestr_save;
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
goto interminable;
}
CopLINE_set(PL_curcop, origline);
PL_linestr = linestr_save;
PL_linestart = SvPVX(linestr_save);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
s = d;
break;
}
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
- else if (IN_ENCODING)
- sv_recode_to_utf8(tmpstr, _get_encoding());
}
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
}
/* scan_inputsymbol
- takes: current position in input buffer
- returns: new position in input buffer
+ takes: position of first '<' in input buffer
+ returns: position of first char following the matching '>' in
+ input buffer
side-effects: pl_yylval and lex_op are set.
This code handles:
OP * const o = newOP(OP_PADSV, 0);
o->op_targ = tmp;
PL_lex_op = readline_overriden
- ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
+ ? newUNOP(OP_ENTERSUB, OPf_STACKED,
op_append_elem(OP_LIST, o,
newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
- : (OP*)newUNOP(OP_READLINE, 0, o);
+ : newUNOP(OP_READLINE, 0, o);
}
}
else {
GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
SVt_PV);
PL_lex_op = readline_overriden
- ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
+ ? newUNOP(OP_ENTERSUB, OPf_STACKED,
op_append_elem(OP_LIST,
newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
- : (OP*)newUNOP(OP_READLINE, 0,
+ : newUNOP(OP_READLINE, 0,
newUNOP(OP_RV2SV, 0,
newGVOP(OP_GV, 0, gv)));
}
else {
GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
PL_lex_op = readline_overriden
- ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
+ ? newUNOP(OP_ENTERSUB, OPf_STACKED,
op_append_elem(OP_LIST,
newGVOP(OP_GV, 0, gv),
newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
- : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
+ : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
pl_yylval.ival = OP_NULL;
}
}
char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
bool has_utf8 = FALSE; /* is there any utf8 content? */
- I32 termcode; /* terminating char. code */
+ IV termcode; /* terminating char. code */
U8 termstr[UTF8_MAXBYTES]; /* terminating string */
STRLEN termlen; /* length of terminating string */
- int last_off = 0; /* last position for nesting bracket */
line_t herelines;
PERL_ARGS_ASSERT_SCAN_STR;
/* mark where we are */
PL_multi_start = CopLINE(PL_curcop);
- PL_multi_open = term;
+ PL_multi_open = termcode;
herelines = PL_parser->herelines;
/* find corresponding closing delimiter */
if (term && (tmps = strchr("([{< )]}> )]}>",term)))
termcode = termstr[0] = term = tmps[5];
- PL_multi_close = term;
+ PL_multi_close = termcode;
if (PL_multi_open == PL_multi_close) {
keep_bracketed_quoted = FALSE;
sv_catpvn(sv, s, termlen);
s += termlen;
for (;;) {
- if (IN_ENCODING && !UTF && !re_reparse) {
- bool cont = TRUE;
-
- while (cont) {
- int offset = s - SvPVX_const(PL_linestr);
- const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
- &offset, (char*)termstr, termlen);
- const char *ns;
- char *svlast;
-
- if (SvIsCOW(PL_linestr)) {
- STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
- STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
- STRLEN last_lop_pos, re_eval_start_pos, s_pos;
- char *buf = SvPVX(PL_linestr);
- bufend_pos = PL_parser->bufend - buf;
- bufptr_pos = PL_parser->bufptr - buf;
- oldbufptr_pos = PL_parser->oldbufptr - buf;
- oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
- linestart_pos = PL_parser->linestart - buf;
- last_uni_pos = PL_parser->last_uni
- ? PL_parser->last_uni - buf
- : 0;
- last_lop_pos = PL_parser->last_lop
- ? PL_parser->last_lop - buf
- : 0;
- re_eval_start_pos =
- PL_parser->lex_shared->re_eval_start ?
- PL_parser->lex_shared->re_eval_start - buf : 0;
- s_pos = s - buf;
-
- sv_force_normal(PL_linestr);
-
- buf = SvPVX(PL_linestr);
- PL_parser->bufend = buf + bufend_pos;
- PL_parser->bufptr = buf + bufptr_pos;
- PL_parser->oldbufptr = buf + oldbufptr_pos;
- PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
- PL_parser->linestart = buf + linestart_pos;
- if (PL_parser->last_uni)
- PL_parser->last_uni = buf + last_uni_pos;
- if (PL_parser->last_lop)
- PL_parser->last_lop = buf + last_lop_pos;
- if (PL_parser->lex_shared->re_eval_start)
- PL_parser->lex_shared->re_eval_start =
- buf + re_eval_start_pos;
- s = buf + s_pos;
- }
- ns = SvPVX_const(PL_linestr) + offset;
- svlast = SvEND(sv) - 1;
-
- for (; s < ns; s++) {
- if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
- COPLINE_INC_WITH_HERELINES;
- }
- if (!found)
- goto read_more_line;
- else {
- /* handle quoted delimiters */
- if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
- const char *t;
- for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
- t--;
- if ((svlast-1 - t) % 2) {
- if (!keep_bracketed_quoted) {
- *(svlast-1) = term;
- *svlast = '\0';
- SvCUR_set(sv, SvCUR(sv) - 1);
- }
- continue;
- }
- }
- if (PL_multi_open == PL_multi_close) {
- cont = FALSE;
- }
- else {
- const char *t;
- char *w;
- for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
- /* At here, all closes are "was quoted" one,
- so we don't check PL_multi_close. */
- if (*t == '\\') {
- if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
- t++;
- else
- *w++ = *t++;
- }
- else if (*t == PL_multi_open)
- brackets++;
-
- *w = *t;
- }
- if (w < t) {
- *w++ = term;
- *w = '\0';
- SvCUR_set(sv, w - SvPVX_const(sv));
- }
- last_off = w - SvPVX(sv);
- if (--brackets <= 0)
- cont = FALSE;
- }
- }
- }
- if (!keep_delims) {
- SvCUR_set(sv, SvCUR(sv) - 1);
- *SvEND(sv) = '\0';
- }
- break;
- }
-
/* extend sv if need be */
SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
/* set 'to' to the next character in the sv's string */
/* backslashes can escape the open or closing characters */
if (*s == '\\' && s+1 < PL_bufend) {
if (!keep_bracketed_quoted
- && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+ && ( ((UV)s[1] == PL_multi_open)
+ || ((UV)s[1] == PL_multi_close) ))
{
s++;
}
*to++ = *s++;
}
/* allow nested opens and closes */
- else if (*s == PL_multi_close && --brackets <= 0)
+ else if ((UV)*s == PL_multi_close && --brackets <= 0)
break;
- else if (*s == PL_multi_open)
+ else if ((UV)*s == PL_multi_open)
brackets++;
else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
has_utf8 = TRUE;
to[-1] = '\n';
#endif
- read_more_line:
/* if we're out of file, or a read fails, bail and reset the current
line marker so we can report where the unterminated string began
*/
/* at this point, we have successfully read the delimited string */
- if (!IN_ENCODING || UTF || re_reparse) {
-
- if (keep_delims)
+ if (keep_delims)
sv_catpvn(sv, s, termlen);
- s += termlen;
- }
- if (has_utf8 || (IN_ENCODING && !re_reparse))
+ s += termlen;
+
+ if (has_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
*/
if (PL_lex_stuff)
- PL_sublex_info.repl = sv;
+ PL_parser->lex_sub_repl = sv;
else
PL_lex_stuff = sv;
if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
* multiple fp operations. */
bool hexfp = FALSE;
int total_bits = 0;
+ int significant_bits = 0;
#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
# define HEXFP_UQUAD
Uquad_t hexfp_uquad = 0;
#endif
NV hexfp_mult = 1.0;
UV high_non_zero = 0; /* highest digit */
+ int non_zero_integer_digits = 0;
PERL_ARGS_ASSERT_SCAN_NUM;
if (high_non_zero == 0 && b > 0)
high_non_zero = b;
+ if (high_non_zero)
+ non_zero_integer_digits++;
+
/* this could be hexfp, but peek ahead
* to avoid matching ".." */
if (UNLIKELY(HEXFP_PEEK(s))) {
* detection will shortly be more thorough with the
* underbar checks. */
const char* h = s;
+ significant_bits = non_zero_integer_digits * shift;
#ifdef HEXFP_UQUAD
hexfp_uquad = u;
#else /* HEXFP_NV */
hexfp_nv = u;
#endif
+ /* Ignore the leading zero bits of
+ * the high (first) non-zero digit. */
+ if (high_non_zero) {
+ if (high_non_zero < 0x8)
+ significant_bits--;
+ if (high_non_zero < 0x4)
+ significant_bits--;
+ if (high_non_zero < 0x2)
+ significant_bits--;
+ }
+
if (*h == '.') {
#ifdef HEXFP_NV
- NV mult = 1 / 16.0;
+ NV nv_mult = 1.0;
#endif
- h++;
- while (isXDIGIT(*h) || *h == '_') {
+ bool accumulate = TRUE;
+ for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
if (isXDIGIT(*h)) {
U8 b = XDIGIT_VALUE(*h);
- total_bits += shift;
+ significant_bits += shift;
#ifdef HEXFP_UQUAD
- hexfp_uquad <<= shift;
- hexfp_uquad |= b;
- hexfp_frac_bits += shift;
+ if (accumulate) {
+ if (significant_bits < NV_MANT_DIG) {
+ /* We are in the long "run" of xdigits,
+ * accumulate the full four bits. */
+ hexfp_uquad <<= shift;
+ hexfp_uquad |= b;
+ hexfp_frac_bits += shift;
+ } else {
+ /* We are at a hexdigit either at,
+ * or straddling, the edge of mantissa.
+ * We will try grabbing as many as
+ * possible bits. */
+ int tail =
+ significant_bits - NV_MANT_DIG;
+ if (tail <= 0)
+ tail += shift;
+ hexfp_uquad <<= tail;
+ hexfp_uquad |= b >> (shift - tail);
+ hexfp_frac_bits += tail;
+
+ /* Ignore the trailing zero bits
+ * of the last non-zero xdigit.
+ *
+ * The assumption here is that if
+ * one has input of e.g. the xdigit
+ * eight (0x8), there is only one
+ * bit being input, not the full
+ * four bits. Conversely, if one
+ * specifies a zero xdigit, the
+ * assumption is that one really
+ * wants all those bits to be zero. */
+ if (b) {
+ if ((b & 0x1) == 0x0) {
+ significant_bits--;
+ if ((b & 0x2) == 0x0) {
+ significant_bits--;
+ if ((b & 0x4) == 0x0) {
+ significant_bits--;
+ }
+ }
+ }
+ }
+
+ accumulate = FALSE;
+ }
+ } else {
+ /* Keep skipping the xdigits, and
+ * accumulating the significant bits,
+ * but do not shift the uquad
+ * (which would catastrophically drop
+ * high-order bits) or accumulate the
+ * xdigits anymore. */
+ }
#else /* HEXFP_NV */
- hexfp_nv += b * mult;
- mult /= 16.0;
+ if (accumulate) {
+ nv_mult /= 16.0;
+ if (nv_mult > 0.0)
+ hexfp_nv += b * nv_mult;
+ else
+ accumulate = FALSE;
+ }
#endif
}
- h++;
+ if (significant_bits >= NV_MANT_DIG)
+ accumulate = FALSE;
}
}
- if (total_bits >= 4) {
- if (high_non_zero < 0x8)
- total_bits--;
- if (high_non_zero < 0x4)
- total_bits--;
- if (high_non_zero < 0x2)
- total_bits--;
- }
-
- if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
+ if ((total_bits > 0 || significant_bits > 0) &&
+ isALPHA_FOLD_EQ(*h, 'p')) {
bool negexp = FALSE;
h++;
if (*h == '+')
#ifdef NV_MIN_EXP
if (negexp
&& -hexfp_exp < NV_MIN_EXP - 1) {
+ /* NOTE: this means that the exponent
+ * underflow warning happens for
+ * the IEEE 754 subnormals (denormals),
+ * because DBL_MIN_EXP etc are the lowest
+ * possible binary (or, rather, DBL_RADIX-base)
+ * exponent for normals, not subnormals.
+ *
+ * This may or may not be a good thing. */
Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
"Hexadecimal float: exponent underflow");
break;
#ifdef HEXFP_UQUAD
hexfp_exp -= hexfp_frac_bits;
#endif
- hexfp_mult = pow(2.0, hexfp_exp);
+ hexfp_mult = Perl_pow(2.0, hexfp_exp);
hexfp = TRUE;
goto decimal;
}
*d = '\0';
if (UNLIKELY(hexfp)) {
# ifdef NV_MANT_DIG
- if (total_bits > NV_MANT_DIG)
+ if (significant_bits > NV_MANT_DIG)
Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
"Hexadecimal float: mantissa overflow");
# endif
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
SvUTF8_on(stuff);
- else if (IN_ENCODING)
- sv_recode_to_utf8(stuff, _get_encoding());
}
- NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
+ NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
force_next(THING);
}
else {
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
else if (yychar == YYEMPTY) {
- if ( PL_lex_state == LEX_NORMAL
- || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
+ if (PL_lex_state == LEX_NORMAL)
sv_catpvs(where_sv, "at end of line");
else if (PL_lex_inpat)
sv_catpvs(where_sv, "within pattern");
PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
- sv_setpvs(filter, "");
+ SvPVCLEAR(filter);
IoLINES(filter) = reversed;
IoPAGE(filter) = 1; /* Not EOF */
if (*s == 'v')
s++; /* get past 'v' */
- sv_setpvs(sv, "");
+ SvPVCLEAR(sv);
for (;;) {
/* this is atoi() that tolerates underscores */
"Integer overflow in decimal number");
}
}
-#ifdef EBCDIC
- if (rev > 0x7FFFFFFF)
- Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
-#endif
+
/* Append native character for the rev point */
tmpend = uvchr_to_utf8(tmpbuf, rev);
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
{
if (flags & ~PARSE_OPTIONAL)
Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
- if (PL_lex_state == LEX_KNOWNEXT) {
+ if (PL_nexttoke) {
PL_parser->yychar = yylex();
if (PL_parser->yychar == LABEL) {
char * const lpv = pl_yylval.pval;
if (!isIDFIRST_lazy_if(s, UTF))
goto no_label;
t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
- if (word_takes_any_delimeter(s, wlen))
+ if (word_takes_any_delimiter(s, wlen))
goto no_label;
bufptr_pos = s - SvPVX(PL_linestr);
PL_bufptr = t;
return stmtseqop;
}
-#define lex_token_boundary() S_lex_token_boundary(aTHX)
-static void
-S_lex_token_boundary(pTHX)
-{
- PL_oldoldbufptr = PL_oldbufptr;
- PL_oldbufptr = PL_bufptr;
-}
-
-#define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
-static OP *
-S_parse_opt_lexvar(pTHX)
-{
- I32 sigil, c;
- char *s, *d;
- OP *var;
- lex_token_boundary();
- sigil = lex_read_unichar(0);
- if (lex_peek_unichar(0) == '#') {
- qerror(Perl_mess(aTHX_ "Parse error"));
- return NULL;
- }
- lex_read_space(0);
- c = lex_peek_unichar(0);
- if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
- return NULL;
- s = PL_bufptr;
- d = PL_tokenbuf + 1;
- PL_tokenbuf[0] = (char)sigil;
- parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
- PL_bufptr = s;
- if (d == PL_tokenbuf+1)
- return NULL;
- var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
- OPf_MOD | (OPpLVAL_INTRO<<8));
- var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
- return var;
-}
-
-OP *
-Perl_parse_subsignature(pTHX)
-{
- I32 c;
- int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
- OP *initops = NULL;
- lex_read_space(0);
- c = lex_peek_unichar(0);
- while (c != /*(*/')') {
- switch (c) {
- case '$': {
- OP *var, *expr;
- if (prev_type == 2)
- qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
- var = parse_opt_lexvar();
- expr = var ?
- newBINOP(OP_AELEM, 0,
- ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
- OP_RV2AV),
- newSVOP(OP_CONST, 0, newSViv(pos))) :
- NULL;
- lex_read_space(0);
- c = lex_peek_unichar(0);
- if (c == '=') {
- lex_token_boundary();
- lex_read_unichar(0);
- lex_read_space(0);
- c = lex_peek_unichar(0);
- if (c == ',' || c == /*(*/')') {
- if (var)
- qerror(Perl_mess(aTHX_ "Optional parameter "
- "lacks default expression"));
- } else {
- OP *defexpr = parse_termexpr(0);
- if (defexpr->op_type == OP_UNDEF
- && !(defexpr->op_flags & OPf_KIDS))
- {
- op_free(defexpr);
- } else {
- OP *ifop =
- newBINOP(OP_GE, 0,
- scalar(newUNOP(OP_RV2AV, 0,
- newGVOP(OP_GV, 0, PL_defgv))),
- newSVOP(OP_CONST, 0, newSViv(pos+1)));
- expr = var ?
- newCONDOP(0, ifop, expr, defexpr) :
- newLOGOP(OP_OR, 0, ifop, defexpr);
- }
- }
- prev_type = 1;
- } else {
- if (prev_type == 1)
- qerror(Perl_mess(aTHX_ "Mandatory parameter "
- "follows optional parameter"));
- prev_type = 0;
- min_arity = pos + 1;
- }
- if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
- if (expr)
- initops = op_append_list(OP_LINESEQ, initops,
- newSTATEOP(0, NULL, expr));
- max_arity = ++pos;
- } break;
- case '@':
- case '%': {
- OP *var;
- if (prev_type == 2)
- qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
- var = parse_opt_lexvar();
- if (c == '%') {
- OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
- newBINOP(OP_BIT_AND, 0,
- scalar(newUNOP(OP_RV2AV, 0,
- newGVOP(OP_GV, 0, PL_defgv))),
- newSVOP(OP_CONST, 0, newSViv(1))),
- op_convert_list(OP_DIE, 0,
- op_convert_list(OP_SPRINTF, 0,
- op_append_list(OP_LIST,
- newSVOP(OP_CONST, 0,
- newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
- newSLICEOP(0,
- op_append_list(OP_LIST,
- newSVOP(OP_CONST, 0, newSViv(1)),
- newSVOP(OP_CONST, 0, newSViv(2))),
- newOP(OP_CALLER, 0))))));
- if (pos != min_arity)
- chkop = newLOGOP(OP_AND, 0,
- newBINOP(OP_GT, 0,
- scalar(newUNOP(OP_RV2AV, 0,
- newGVOP(OP_GV, 0, PL_defgv))),
- newSVOP(OP_CONST, 0, newSViv(pos))),
- chkop);
- initops = op_append_list(OP_LINESEQ,
- newSTATEOP(0, NULL, chkop),
- initops);
- }
- if (var) {
- OP *slice = pos ?
- op_prepend_elem(OP_ASLICE,
- newOP(OP_PUSHMARK, 0),
- newLISTOP(OP_ASLICE, 0,
- list(newRANGE(0,
- newSVOP(OP_CONST, 0, newSViv(pos)),
- newUNOP(OP_AV2ARYLEN, 0,
- ref(newUNOP(OP_RV2AV, 0,
- newGVOP(OP_GV, 0, PL_defgv)),
- OP_AV2ARYLEN)))),
- ref(newUNOP(OP_RV2AV, 0,
- newGVOP(OP_GV, 0, PL_defgv)),
- OP_ASLICE))) :
- newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
- initops = op_append_list(OP_LINESEQ, initops,
- newSTATEOP(0, NULL,
- newASSIGNOP(OPf_STACKED, var, 0, slice)));
- }
- prev_type = 2;
- max_arity = -1;
- } break;
- default:
- parse_error:
- qerror(Perl_mess(aTHX_ "Parse error"));
- return NULL;
- }
- lex_read_space(0);
- c = lex_peek_unichar(0);
- switch (c) {
- case /*(*/')': break;
- case ',':
- do {
- lex_token_boundary();
- lex_read_unichar(0);
- lex_read_space(0);
- c = lex_peek_unichar(0);
- } while (c == ',');
- break;
- default:
- goto parse_error;
- }
- }
- if (min_arity != 0) {
- initops = op_append_list(OP_LINESEQ,
- newSTATEOP(0, NULL,
- newLOGOP(OP_OR, 0,
- newBINOP(OP_GE, 0,
- scalar(newUNOP(OP_RV2AV, 0,
- newGVOP(OP_GV, 0, PL_defgv))),
- newSVOP(OP_CONST, 0, newSViv(min_arity))),
- op_convert_list(OP_DIE, 0,
- op_convert_list(OP_SPRINTF, 0,
- op_append_list(OP_LIST,
- newSVOP(OP_CONST, 0,
- newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
- newSLICEOP(0,
- op_append_list(OP_LIST,
- newSVOP(OP_CONST, 0, newSViv(1)),
- newSVOP(OP_CONST, 0, newSViv(2))),
- newOP(OP_CALLER, 0))))))),
- initops);
- }
- if (max_arity != -1) {
- initops = op_append_list(OP_LINESEQ,
- newSTATEOP(0, NULL,
- newLOGOP(OP_OR, 0,
- newBINOP(OP_LE, 0,
- scalar(newUNOP(OP_RV2AV, 0,
- newGVOP(OP_GV, 0, PL_defgv))),
- newSVOP(OP_CONST, 0, newSViv(max_arity))),
- op_convert_list(OP_DIE, 0,
- op_convert_list(OP_SPRINTF, 0,
- op_append_list(OP_LIST,
- newSVOP(OP_CONST, 0,
- newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
- newSLICEOP(0,
- op_append_list(OP_LIST,
- newSVOP(OP_CONST, 0, newSViv(1)),
- newSVOP(OP_CONST, 0, newSViv(2))),
- newOP(OP_CALLER, 0))))))),
- initops);
- }
- return initops;
-}
-
/*
* ex: set ts=8 sts=4 sw=4 et:
*/