#include "dquote_inline.h"
#include "invlist_inline.h"
-#define new_constant(a,b,c,d,e,f,g) \
- S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+#define new_constant(a,b,c,d,e,f,g, h) \
+ S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
#define pl_yylval (PL_parser->yylval)
{ GIVEN, TOKENTYPE_IVAL, "GIVEN" },
{ HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
{ IF, TOKENTYPE_IVAL, "IF" },
- { LABEL, TOKENTYPE_PVAL, "LABEL" },
+ { LABEL, TOKENTYPE_OPVAL, "LABEL" },
{ LOCAL, TOKENTYPE_IVAL, "LOCAL" },
{ LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
{ LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
if (flags & LEX_STUFF_UTF8) {
goto plain_copy;
} else {
- STRLEN highhalf = 0; /* Count of variants */
- const char *p, *e = pv+len;
- for (p = pv; p != e; p++) {
- if (! UTF8_IS_INVARIANT(*p)) {
- highhalf++;
- }
- }
+ STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
+ (U8 *) pv + len);
+ const char *p, *e = pv+len;;
if (!highhalf)
goto plain_copy;
lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
}
else if (GvAV(cfgv)) {
AV * const av = GvAV(cfgv);
- const I32 start = CopLINE(PL_curcop)+1;
- I32 items = AvFILLp(av) - start;
+ const line_t start = CopLINE(PL_curcop)+1;
+ SSize_t items = AvFILLp(av) - start;
if (items > 0) {
AV * const av2 = GvAVn(gv2);
SV **svp = AvARRAY(av) + start;
- I32 l = (I32)line_num+1;
- while (items--)
- av_store(av2, l++, SvREFCNT_inc(*svp++));
+ Size_t l = line_num+1;
+ while (items-- && l < SSize_t_MAX && l == (line_t)l)
+ av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
}
}
}
SV * const sv = newSVpvn_utf8(start, len,
! IN_BYTES
&& UTF
+ && len != 0
&& is_utf8_non_invariant_string((const U8*)start, len));
return sv;
}
SvCUR_set(sv, d - SvPVX_const(sv));
finish:
if ( PL_hints & HINT_NEW_STRING )
- return new_constant(NULL, 0, "q", sv, pv, "q", 1);
+ return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
return sv;
}
const line_t l = CopLINE(PL_curcop);
LEAVE;
if (PL_parser->sub_error_count != PL_error_count) {
- const char * const name = OutCopFILE(PL_curcop);
if (PL_parser->sub_no_recover) {
- const char * msg = "";
- if (PL_in_eval) {
- SV *errsv = ERRSV;
- if (SvCUR(ERRSV)) {
- msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
- }
- }
- abort_execution(msg, name);
+ yyquit();
NOT_REACHED;
}
}
}
STATIC SV*
-S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
+{
+ /* This justs wraps get_and_check_backslash_N_name() to output any error
+ * message it returns. */
+
+ const char * error_msg = NULL;
+ SV * result;
+
+ PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
+
+ /* charnames doesn't work well if there have been errors found */
+ if (PL_error_count > 0) {
+ return NULL;
+ }
+
+ result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
+
+ if (error_msg) {
+ yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
+ }
+
+ return result;
+}
+
+SV*
+Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
+ const char* const e,
+ const bool is_utf8,
+ const char ** error_msg)
{
/* <s> points to first character of interior of \N{}, <e> to one beyond the
* interior, hence to the "}". Finds what the name resolves to, returning
- * an SV* containing it; NULL if no valid one found */
-
- SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+ * an SV* containing it; NULL if no valid one found.
+ *
+ * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
+ * doesn't have to be. */
+ SV* res;
HV * table;
SV **cvp;
SV *cv;
SV *rv;
HV *stash;
const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+ dVAR;
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+ assert(e >= s);
+ assert(s > (char *) 3);
+
+ res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+
if (!SvCUR(res)) {
SvREFCNT_dec_NN(res);
/* diag_listed_as: Unknown charname '%s' */
- yyerror("Unknown charname ''");
+ *error_msg = Perl_form(aTHX_ "Unknown charname ''");
return NULL;
}
res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
/* include the <}> */
- e - backslash_ptr + 1);
+ e - backslash_ptr + 1, error_msg);
if (! SvPOK(res)) {
SvREFCNT_dec_NN(res);
return NULL;
* characters that begin a character name alias are alphabetic, otherwise
* would have to create a isCHARNAME_BEGIN macro */
- if (! UTF) {
+ if (! is_utf8) {
if (! isALPHAU(*s)) {
goto bad_charname;
}
/* diag_listed_as: charnames alias definitions may not contain
trailing white-space; marked by <-- HERE in %s
*/
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain trailing "
"white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
- if (SvUTF8(res)) { /* Don't accept malformed input */
+ if (SvUTF8(res)) { /* Don't accept malformed charname value */
const U8* first_bad_char_loc;
STRLEN len;
const char* const str = SvPV_const(res, len);
0 /* 0 means don't die */ );
/* diag_listed_as: Malformed UTF-8 returned by \N{%s}
immediately after '%s' */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Malformed UTF-8 returned by %.*s immediately after '%.*s'",
(int) (e - backslash_ptr + 1), backslash_ptr,
- (int) ((char *) first_bad_char_loc - str), str
- ),
- SVf_UTF8);
+ (int) ((char *) first_bad_char_loc - str), str);
return NULL;
}
}
* that this print won't run off the end of the string */
/* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
in \N{%s} */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
/* diag_listed_as: charnames alias definitions may not contain a
sequence of multiple spaces; marked by <-- HERE
in %s */
- yyerror_pv(
- Perl_form(aTHX_
+ *error_msg = Perl_form(aTHX_
"charnames alias definitions may not contain a sequence of "
"multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
(int)(s - backslash_ptr + 1), backslash_ptr,
- (int)(e - s + 1), s + 1
- ),
- UTF ? SVf_UTF8 : 0);
+ (int)(e - s + 1), s + 1);
return NULL;
}
bool dorange = FALSE; /* are we in a translit range? */
bool didrange = FALSE; /* did we just finish a range? */
bool in_charclass = FALSE; /* within /[...]/ */
- bool has_utf8 = FALSE; /* Output constant is UTF8 */
- bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
+ bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
+ bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
UTF8? But, this can show as true
when the source isn't utf8, as for
example when it is entirely composed
assert(PL_lex_inwhat != OP_TRANSR);
if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
/* If we are doing a trans and we know we want UTF8 set expectation */
- has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
- this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+ d_is_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
+ s_is_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
/* Protect sv from errors and fatal warnings. */
* occurences in the constant, except those added by a
* backslash escape sequence, like \x{100}. Mostly, those
* set 'has_above_latin1' as appropriate */
- if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
* time through the loop */
offset_to_max = d - SvPVX_const(sv);
- if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+ if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
has_above_latin1 = TRUE;
}
IV real_range_max = 0;
#endif
/* Get the code point values of the range ends. */
- if (has_utf8) {
+ if (d_is_utf8) {
/* We know the utf8 is valid, because we just constructed
* it ourselves in previous loop iterations */
min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
* get it out of the way now.) */
if (UNLIKELY(range_max == range_min)) {
d = max_ptr;
- if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+ if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
utf8_variant_count--;
}
goto range_done;
/* Here the range contains at least 3 code points */
- if (has_utf8) {
+ if (d_is_utf8) {
/* If everything in the transliteration is below 256, we
* can avoid special handling later. A translation table
* */
grow = (range_max - 1) - (range_min + 1) + 1;
- if (has_utf8) {
+ if (d_is_utf8) {
#ifdef EBCDIC
/* In some cases in EBCDIC, we haven't yet calculated a
* precise amount needed for the UTF-8 variants. Just
/* Recall that the min and max are now in Unicode terms, so
* we have to convert each character to its native
* equivalent */
- if (has_utf8) {
+ if (d_is_utf8) {
for (i = range_min; i <= range_max; i++) {
append_utf8_from_native_byte(
LATIN1_TO_NATIVE((U8) i),
/* Here, no conversions are necessary, which means that the
* first character in the range is already in 'd' and
* valid, so we can skip overwriting it */
- if (has_utf8) {
+ if (d_is_utf8) {
SSize_t i;
d += UTF8SKIP(d);
for (i = range_min + 1; i <= range_max; i++) {
{
const char* error;
- bool valid = grok_bslash_o(&s, PL_bufend,
+ bool valid = grok_bslash_o(&s, send,
&uv, &error,
TRUE, /* Output warning */
FALSE, /* Not strict */
{
const char* error;
- bool valid = grok_bslash_x(&s, PL_bufend,
+ bool valid = grok_bslash_x(&s, send,
&uv, &error,
TRUE, /* Output warning */
FALSE, /* Not strict */
*d++ = (char) uv;
}
else {
- if (!has_utf8 && uv > 255) {
+ if (!d_is_utf8 && uv > 255) {
/* Here, 'uv' won't fit unless we convert to UTF-8.
* If we've only seen invariants so far, all we have to
}
has_above_latin1 = TRUE;
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
}
- if (! has_utf8) {
+ if (! d_is_utf8) {
*d++ = (char)uv;
utf8_variant_count++;
}
* tr/// doesn't care about Unicode rules, so no need
* there to upgrade to UTF-8 for small enough code
* points */
- if (! has_utf8 && ( uv > 0xFF
+ if (! d_is_utf8 && ( uv > 0xFF
|| PL_lex_inwhat != OP_TRANS))
{
/* See Note on sizing above. */
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
has_above_latin1 = TRUE;
}
/* Add the (Unicode) code point to the output. */
- if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
+ if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
*d++ = (char) LATIN1_TO_NATIVE(uv);
}
else {
}
}
else /* Here is \N{NAME} but not \N{U+...}. */
- if ((res = get_and_check_backslash_N_name(s, e)))
- {
+ if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
+ { /* Failed. We should die eventually, but for now use a NUL
+ to keep parsing */
+ *d++ = '\0';
+ }
+ else { /* Successfully evaluated the name */
STRLEN len;
const char *str = SvPV_const(res, len);
if (PL_lex_inpat) {
if (! len) { /* The name resolved to an empty string */
- Copy("\\N{}", d, 4, char);
- d += 4;
+ const char empty_N[] = "\\N{_}";
+ Copy(empty_N, d, sizeof(empty_N) - 1, char);
+ d += sizeof(empty_N) - 1;
}
else {
/* In order to not lose information for the regex
/* Upgrade destination to be utf8 if this new
* component is */
- if (! has_utf8 && SvUTF8(res)) {
+ if (! d_is_utf8 && SvUTF8(res)) {
/* See Note on sizing above. */
const STRLEN extra = len + (send - s) + 1;
extra);
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
} else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
/* See Note on sizing above. (NOTE: SvCUR() is not
if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
*d++ = *s++;
}
- else if (! this_utf8 && ! has_utf8) {
+ else if (! s_is_utf8 && ! d_is_utf8) {
/* If neither source nor output is UTF-8, is also a single byte,
* just copy it; but this byte counts should we later have to
* convert to UTF-8 */
*d++ = *s++;
utf8_variant_count++;
}
- else if (this_utf8 && has_utf8) { /* Both UTF-8, can just copy */
+ else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
const STRLEN len = UTF8SKIP(s);
/* We expect the source to have already been checked for
}
else { /* UTF8ness matters and doesn't match, need to convert */
STRLEN len = 1;
- const UV nextuv = (this_utf8)
+ const UV nextuv = (s_is_utf8)
? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
: (UV) ((U8) *s);
STRLEN need = UVCHR_SKIP(nextuv);
- if (!has_utf8) {
+ if (!d_is_utf8) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
need);
d = SvPVX(sv) + SvCUR(sv);
}
- has_utf8 = TRUE;
+ d_is_utf8 = TRUE;
} else if (need > len) {
/* encoded value larger than old, may need extra space (NOTE:
* SvCUR() is not set correctly here). See Note on sizing
" >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (has_utf8) {
+ if (d_is_utf8) {
SvUTF8_on(sv);
if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
PL_parser->lex_sub_op->op_private |=
}
sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
- type, typelen);
+ type, typelen, NULL);
}
pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
}
return yylex();
case LEX_FORMLINE:
+ if (PL_parser->sub_error_count != PL_error_count) {
+ /* There was an error parsing a formline, which tends to
+ mess up the parser.
+ Unlike interpolated sub-parsing, we can't treat any of
+ these as recoverable, so no need to check sub_no_recover.
+ */
+ yyquit();
+ }
assert(PL_lex_formbrack);
s = scan_formline(PL_bufptr);
if (!PL_lex_formbrack)
SAVEI32(PL_lex_formbrack);
PL_parser->form_lex_state = PL_lex_state;
PL_lex_formbrack = PL_lex_brackets + 1;
+ PL_parser->sub_error_count = PL_error_count;
goto leftbracket;
}
}
if (!anydelim && PL_expect == XSTATE
&& d < PL_bufend && *d == ':' && *(d + 1) != ':') {
s = d + 1;
- pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
- pl_yylval.pval[len] = '\0';
- pl_yylval.pval[len+1] = UTF ? 1 : 0;
+ pl_yylval.opval =
+ newSVOP(OP_CONST, 0,
+ newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
CLINE;
TOKEN(LABEL);
}
/* Look for a prototype */
if (*s == '(' && !is_sigsub) {
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
- COPLINE_SET_FROM_MULTI_END;
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
+ COPLINE_SET_FROM_MULTI_END;
(void)validate_proto(PL_subname, PL_lex_stuff,
ckWARN(WARN_ILLEGALPROTO), 0);
have_proto = TRUE;
Best used as sv=new_constant(..., sv, ...).
If s, pv are NULL, calls subroutine with one argument,
and <type> is used with error messages only.
- <type> is assumed to be well formed UTF-8 */
+ <type> is assumed to be well formed UTF-8.
+
+ If error_msg is not NULL, *error_msg will be set to any error encountered.
+ Otherwise yyerror() will be used to output it */
STATIC SV *
S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
- SV *sv, SV *pv, const char *type, STRLEN typelen)
+ SV *sv, SV *pv, const char *type, STRLEN typelen,
+ const char ** error_msg)
{
dSP;
HV * table = GvHV(PL_hintgv); /* ^H */
if (*key == 'c') { assert (strEQ(key, "charnames")); }
assert(type || s);
- /* charnames doesn't work well if there have been errors found */
- if (PL_error_count > 0 && *key == 'c')
- {
- SvREFCNT_dec_NN(sv);
- return &PL_sv_undef;
- }
-
sv_2mortal(sv); /* Parent created it permanently */
if (!table
|| ! (PL_hints & HINT_LOCALIZE_HH)
(type ? type: s), why1, why2, why3);
}
}
- yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ if (error_msg) {
+ *error_msg = msg;
+ }
+ else {
+ yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+ }
return SvREFCNT_inc_simple_NN(sv);
}
now_ok:
e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
*PL_tokenbuf = '\n';
peek = s;
+
if (*peek == '~') {
indented = TRUE;
peek++; s++;
}
+
while (SPACE_OR_TAB(*peek))
peek++;
+
if (*peek == '`' || *peek == '\'' || *peek =='"') {
s = peek;
term = *s++;
s++, term = '\'';
else
term = '"';
+
if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
+
peek = s;
+
while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
peek += UTF ? UTF8SKIP(peek) : 1;
}
+
len = (peek - s >= e - d) ? (e - d) : (peek - s);
Copy(s, d, len, char);
s += len;
d += len;
}
+
if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
Perl_croak(aTHX_ "Delimiter for here document is too long");
+
*d++ = '\n';
*d = '\0';
len = d - PL_tokenbuf;
PL_multi_start = origline + 1 + PL_parser->herelines;
PL_multi_open = PL_multi_close = '<';
+
/* inside a string eval or quote-like operator */
if (!infile || PL_lex_inwhat) {
SV *linestr;
entered. But we need them set here. */
shared->ls_bufptr = s;
shared->ls_linestr = PL_linestr;
- if (PL_lex_inwhat)
- /* Look for a newline. If the current buffer does not have one,
- peek into the line buffer of the parent lexing scope, going
- up as many levels as necessary to find one with a newline
- after bufptr.
- */
- while (!(s = (char *)memchr(
- (void *)shared->ls_bufptr, '\n',
- SvEND(shared->ls_linestr)-shared->ls_bufptr
- ))) {
- shared = shared->ls_prev;
- /* shared is only null if we have gone beyond the outermost
- lexing scope. In a file, we will have broken out of the
- loop in the previous iteration. In an eval, the string buf-
- fer ends with "\n;", so the while condition above will have
- evaluated to false. So shared can never be null. Or so you
- might think. Odd syntax errors like s;@{<<; can gobble up
- the implicit semicolon at the end of a flie, causing the
- file handle to be closed even when we are not in a string
- eval. So shared may be null in that case.
- (Closing '}' here to balance the earlier open brace for
- editors that look for matched pairs.) */
- if (UNLIKELY(!shared))
- goto interminable;
- /* A LEXSHARED struct with a null ls_prev pointer is the outer-
- most lexing scope. In a file, shared->ls_linestr at that
- level is just one line, so there is no body to steal. */
- if (infile && !shared->ls_prev) {
- s = olds;
- goto streaming;
- }
- }
+
+ if (PL_lex_inwhat) {
+ /* Look for a newline. If the current buffer does not have one,
+ peek into the line buffer of the parent lexing scope, going
+ up as many levels as necessary to find one with a newline
+ after bufptr.
+ */
+ while (!(s = (char *)memchr(
+ (void *)shared->ls_bufptr, '\n',
+ SvEND(shared->ls_linestr)-shared->ls_bufptr
+ )))
+ {
+ shared = shared->ls_prev;
+ /* shared is only null if we have gone beyond the outermost
+ lexing scope. In a file, we will have broken out of the
+ loop in the previous iteration. In an eval, the string buf-
+ fer ends with "\n;", so the while condition above will have
+ evaluated to false. So shared can never be null. Or so you
+ might think. Odd syntax errors like s;@{<<; can gobble up
+ the implicit semicolon at the end of a flie, causing the
+ file handle to be closed even when we are not in a string
+ eval. So shared may be null in that case.
+ (Closing '>>}' here to balance the earlier open brace for
+ editors that look for matched pairs.) */
+ if (UNLIKELY(!shared))
+ goto interminable;
+ /* A LEXSHARED struct with a null ls_prev pointer is the outer-
+ most lexing scope. In a file, shared->ls_linestr at that
+ level is just one line, so there is no body to steal. */
+ if (infile && !shared->ls_prev) {
+ s = olds;
+ goto streaming;
+ }
+ }
+ }
else { /* eval or we've already hit EOF */
s = (char*)memchr((void*)s, '\n', PL_bufend - s);
if (!s)
goto interminable;
}
+
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
d = s;
if (! SPACE_OR_TAB(*backup)) {
break;
}
-
indent_len++;
}
}
}
}
- } else {
+ }
+ else {
while (s < bufend - len + 1
&& memNE(s,PL_tokenbuf,len) )
{
if (s >= bufend - len + 1) {
goto interminable;
}
+
sv_setpvn(tmpstr,d+1,s-d);
s += len - 1;
/* the preceding stmt passes a newline */
bufend - shared->re_eval_start);
shared->re_eval_start -= s-d;
}
+
if (cxstack_ix >= 0
&& CxTYPE(cx) == CXt_EVAL
&& CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
cx->blk_eval.cur_text = newSVsv(linestr);
cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
}
+
/* Copy everything from s onwards back to d. */
Move(s,d,bufend-s + 1,char);
SvCUR_set(linestr, SvCUR(linestr) - (s-d));
/* Setting PL_bufend only applies when we have not dug deeper
into other scopes, because sublex_done sets PL_bufend to
SvEND(PL_linestr). */
- if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
+ if (shared == PL_parser->lex_shared)
+ PL_bufend = SvEND(linestr);
s = olds;
}
- else
- {
- SV *linestr_save;
- char *oldbufptr_save;
- char *oldoldbufptr_save;
- streaming:
- SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
- term = PL_tokenbuf[1];
- len--;
- linestr_save = PL_linestr; /* must restore this afterwards */
- d = s; /* and this */
- oldbufptr_save = PL_oldbufptr;
- oldoldbufptr_save = PL_oldoldbufptr;
- PL_linestr = newSVpvs("");
- PL_bufend = SvPVX(PL_linestr);
- while (1) {
- PL_bufptr = PL_bufend;
- CopLINE_set(PL_curcop,
- origline + 1 + PL_parser->herelines);
- if (!lex_next_chunk(LEX_NO_TERM)
- && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- /* Simply freeing linestr_save might seem simpler here, as it
- does not matter what PL_linestr points to, since we are
- about to croak; but in a quote-like op, linestr_save
- will have been prospectively freed already, via
- SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
- restore PL_linestr. */
- SvREFCNT_dec_NN(PL_linestr);
- PL_linestr = linestr_save;
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- goto interminable;
- }
- CopLINE_set(PL_curcop, origline);
- if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
- s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
- /* ^That should be enough to avoid this needing to grow: */
- sv_catpvs(PL_linestr, "\n\0");
- assert(s == SvPVX(PL_linestr));
- PL_bufend = SvEND(PL_linestr);
- }
- s = PL_bufptr;
- PL_parser->herelines++;
- PL_last_lop = PL_last_uni = NULL;
+ else {
+ SV *linestr_save;
+ char *oldbufptr_save;
+ char *oldoldbufptr_save;
+ streaming:
+ SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
+ term = PL_tokenbuf[1];
+ len--;
+ linestr_save = PL_linestr; /* must restore this afterwards */
+ d = s; /* and this */
+ oldbufptr_save = PL_oldbufptr;
+ oldoldbufptr_save = PL_oldoldbufptr;
+ PL_linestr = newSVpvs("");
+ PL_bufend = SvPVX(PL_linestr);
+
+ while (1) {
+ PL_bufptr = PL_bufend;
+ CopLINE_set(PL_curcop,
+ origline + 1 + PL_parser->herelines);
+
+ if ( !lex_next_chunk(LEX_NO_TERM)
+ && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
+ {
+ /* Simply freeing linestr_save might seem simpler here, as it
+ does not matter what PL_linestr points to, since we are
+ about to croak; but in a quote-like op, linestr_save
+ will have been prospectively freed already, via
+ SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ restore PL_linestr. */
+ SvREFCNT_dec_NN(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ goto interminable;
+ }
+
+ CopLINE_set(PL_curcop, origline);
+
+ if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
+ s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+ /* ^That should be enough to avoid this needing to grow: */
+ sv_catpvs(PL_linestr, "\n\0");
+ assert(s == SvPVX(PL_linestr));
+ PL_bufend = SvEND(PL_linestr);
+ }
+
+ s = PL_bufptr;
+ PL_parser->herelines++;
+ PL_last_lop = PL_last_uni = NULL;
+
#ifndef PERL_STRICT_CR
- if (PL_bufend - PL_linestart >= 2) {
- if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
- || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
- {
- PL_bufend[-2] = '\n';
- PL_bufend--;
- SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
- }
- else if (PL_bufend[-1] == '\r')
- PL_bufend[-1] = '\n';
- }
- else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
- PL_bufend[-1] = '\n';
+ if (PL_bufend - PL_linestart >= 2) {
+ if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+ || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+ {
+ PL_bufend[-2] = '\n';
+ PL_bufend--;
+ SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
+ }
+ else if (PL_bufend[-1] == '\r')
+ PL_bufend[-1] = '\n';
+ }
+ else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
+ PL_bufend[-1] = '\n';
#endif
- if (indented && (PL_bufend-s) >= len) {
- char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
- if (found) {
- char *backup = found;
- indent_len = 0;
+ if (indented && (PL_bufend-s) >= len) {
+ char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
- /* Only valid if it's preceded by whitespace only */
- while (backup != s && --backup >= s) {
- if (! SPACE_OR_TAB(*backup)) {
- break;
- }
- indent_len++;
- }
+ if (found) {
+ char *backup = found;
+ indent_len = 0;
- /* All whitespace or none! */
- if (backup == found || SPACE_OR_TAB(*backup)) {
- Newx(indent, indent_len + 1, char);
- memcpy(indent, backup, indent_len);
- indent[indent_len] = 0;
- SvREFCNT_dec(PL_linestr);
- PL_linestr = linestr_save;
- PL_linestart = SvPVX(linestr_save);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- s = d;
- break;
- }
- }
+ /* Only valid if it's preceded by whitespace only */
+ while (backup != s && --backup >= s) {
+ if (! SPACE_OR_TAB(*backup)) {
+ break;
+ }
+ indent_len++;
+ }
- /* Didn't find it */
- sv_catsv(tmpstr,PL_linestr);
- } else {
- if (*s == term && PL_bufend-s >= len
- && memEQ(s,PL_tokenbuf + 1,len))
- {
- SvREFCNT_dec(PL_linestr);
- PL_linestr = linestr_save;
- PL_linestart = SvPVX(linestr_save);
- PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
- PL_oldbufptr = oldbufptr_save;
- PL_oldoldbufptr = oldoldbufptr_save;
- s = d;
- break;
- } else {
- sv_catsv(tmpstr,PL_linestr);
- }
- }
- }
+ /* All whitespace or none! */
+ if (backup == found || SPACE_OR_TAB(*backup)) {
+ Newx(indent, indent_len + 1, char);
+ memcpy(indent, backup, indent_len);
+ indent[indent_len] = 0;
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ s = d;
+ break;
+ }
+ }
+
+ /* Didn't find it */
+ sv_catsv(tmpstr,PL_linestr);
+ }
+ else {
+ if (*s == term && PL_bufend-s >= len
+ && memEQ(s,PL_tokenbuf + 1,len))
+ {
+ SvREFCNT_dec(PL_linestr);
+ PL_linestr = linestr_save;
+ PL_linestart = SvPVX(linestr_save);
+ PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
+ PL_oldoldbufptr = oldoldbufptr_save;
+ s = d;
+ break;
+ }
+ else {
+ sv_catsv(tmpstr,PL_linestr);
+ }
+ }
+ } /* while (1) */
}
+
PL_multi_end = origline + PL_parser->herelines;
+
if (indented && indent) {
STRLEN linecount = 1;
STRLEN herelen = SvCUR(tmpstr);
linecount++;
/* Found our indentation? Strip it */
- } else if (se - ss >= indent_len
+ }
+ else if (se - ss >= indent_len
&& memEQ(ss, indent, indent_len))
{
STRLEN le = 0;
-
ss += indent_len;
while ((ss + le) < se && *(ss + le) != '\n')
le++;
sv_catpvn(newstr, ss, le);
-
ss += le;
/* Line doesn't begin with our indentation? Croak */
- } else {
+ }
+ else {
+ Safefree(indent);
Perl_croak(aTHX_
"Indentation on line %d of here-doc doesn't match delimiter",
(int)linecount
);
}
- }
+ } /* while */
+
/* avoid sv_setsv() as we dont wan't to COW here */
sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
Safefree(indent);
SvREFCNT_dec_NN(newstr);
}
+
if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
SvPV_shrink_to_cur(tmpstr);
}
+
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
}
+
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
return s;
interminable:
+ if (indent)
+ Safefree(indent);
SvREFCNT_dec(tmpstr);
CopLINE_set(PL_curcop, origline);
missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
}
+
/* scan_inputsymbol
takes: position of first '<' in input buffer
returns: position of first char following the matching '>' in
char term; /* terminating character */
char *to; /* current position in the sv's data */
I32 brackets = 1; /* bracket nesting level */
- bool has_utf8 = FALSE; /* is there any utf8 content? */
+ bool d_is_utf8 = FALSE; /* is there any utf8 content? */
IV termcode; /* terminating char. code */
U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
STRLEN termlen; /* length of terminating string */
break;
}
}
- else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
- has_utf8 = TRUE;
+ else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
+ d_is_utf8 = TRUE;
}
*to = *s;
break;
else if ((UV)*s == PL_multi_open)
brackets++;
- else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
- has_utf8 = TRUE;
+ else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+ d_is_utf8 = TRUE;
*to = *s;
}
}
sv_catpvn(sv, s, termlen);
s += termlen;
- if (has_utf8)
+ if (d_is_utf8)
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
}
if (just_zero && (PL_hints & HINT_NEW_INTEGER))
sv = new_constant(start, s - start, "integer",
- sv, NULL, NULL, 0);
+ sv, NULL, NULL, 0, NULL);
else if (PL_hints & HINT_NEW_BINARY)
- sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
+ sv = new_constant(start, s - start, "binary",
+ sv, NULL, NULL, 0, NULL);
}
break;
const char *const key = floatit ? "float" : "integer";
const STRLEN keylen = floatit ? 5 : 7;
sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
- key, keylen, sv, NULL, NULL, 0);
+ key, keylen, sv, NULL, NULL, 0, NULL);
}
break;
if (PL_nexttoke) {
PL_parser->yychar = yylex();
if (PL_parser->yychar == LABEL) {
- char * const lpv = pl_yylval.pval;
- STRLEN llen = strlen(lpv);
+ SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
PL_parser->yychar = YYEMPTY;
- return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
+ cSVOPx(pl_yylval.opval)->op_sv = NULL;
+ op_free(pl_yylval.opval);
+ return labelsv;
} else {
yyunlex();
goto no_label;