#include "EXTERN.h"
#define PERL_IN_TOKE_C
#include "perl.h"
-#include "dquote_static.c"
+#include "dquote_inline.h"
#define new_constant(a,b,c,d,e,f,g) \
S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
* FUN1 : not used, except for not, which isn't a UNIOP
* BOop : bitwise or or xor
* BAop : bitwise and
+ * BCop : bitwise complement
* SHop : shift operator
* PWop : power operator
* PMop : pattern-matching operator
#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
+#define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
+ REPORT('~')
#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
}
if (name)
Perl_sv_catpv(aTHX_ report, name);
- else if ((char)rv > ' ' && (char)rv <= '~')
+ else if (isGRAPH(rv))
{
Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
if ((char)rv == 'p')
* It prints "Missing operator before end of line" if there's nothing
* after the missing operator, or "... before <...>" if there is something
* after the missing operator.
+ *
+ * PL_bufptr is expected to point to the start of the thing that was found,
+ * and s after the next token or partial token.
*/
STATIC void
will be destroyed and the former value of L</PL_parser> will be restored.
Nothing else need be done to clean up the parsing context.
-The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
+The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
non-null, provides a string (in SV form) containing code to be parsed.
-A copy of the string is made, so subsequent modification of I<line>
-does not affect parsing. I<rsfp>, if non-null, provides an input stream
+A copy of the string is made, so subsequent modification of C<line>
+does not affect parsing. C<rsfp>, if non-null, provides an input stream
from which code will be read to be parsed. If both are non-null, the
-code in I<line> comes first and must consist of complete lines of input,
-and I<rsfp> supplies the remainder of the source.
+code in C<line> comes first and must consist of complete lines of input,
+and C<rsfp> supplies the remainder of the source.
-The I<flags> parameter is reserved for future use. Currently it is only
+The C<flags> parameter is reserved for future use. Currently it is only
used by perl internally, so extensions should always pass zero.
=cut
parser->bufend = parser->bufptr + SvCUR(parser->linestr);
parser->last_lop = parser->last_uni = NULL;
- assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+ STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
PerlIO_clearerr(parser->rsfp);
- else if (parser->rsfp && (!parser->old_parser ||
- (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
+ else if (parser->rsfp && (!parser->old_parser
+ || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
PerlIO_close(parser->rsfp);
SvREFCNT_dec(parser->rsfp_filters);
SvREFCNT_dec(parser->lex_stuff);
=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
-at least I<len> octets (including terminating C<NUL>). Returns a
+at least C<len> octets (including terminating C<NUL>). Returns a
pointer to the reallocated buffer. This is necessary before making
any direct modification of the buffer that would increase its length.
L</lex_stuff_pvn> provides a more convenient way to insert text into
uses of this facility run the risk of the inserted characters being
interpreted in an unintended manner.
-The string to be inserted is represented by I<len> octets starting
-at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
-according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
+The string to be inserted is represented by C<len> octets starting
+at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
+according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
The characters are recoded for the lexer buffer, according to how the
buffer is currently being interpreted (L</lex_bufutf8>). If a string
to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
}
else {
assert(p < e -1 );
- *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
+ *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
p += 2;
}
}
uses of this facility run the risk of the inserted characters being
interpreted in an unintended manner.
-The string to be inserted is represented by octets starting at I<pv>
+The string to be inserted is represented by octets starting at C<pv>
and continuing to the first nul. These octets are interpreted as either
UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
-in I<flags>. The characters are recoded for the lexer buffer, according
+in C<flags>. The characters are recoded for the lexer buffer, according
to how the buffer is currently being interpreted (L</lex_bufutf8>).
If it is not convenient to nul-terminate a string to be inserted, the
L</lex_stuff_pvn> function is more appropriate.
uses of this facility run the risk of the inserted characters being
interpreted in an unintended manner.
-The string to be inserted is the string value of I<sv>. The characters
+The string to be inserted is the string value of C<sv>. The characters
are recoded for the lexer buffer, according to how the buffer is currently
being interpreted (L</lex_bufutf8>). If a string to be inserted is
not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
=for apidoc Amx|void|lex_unstuff|char *ptr
Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
-I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
+C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
This hides the discarded text from any lexing code that runs later,
as if the text had never appeared.
=for apidoc Amx|void|lex_read_to|char *ptr
Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
-to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
+to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
performing the correct bookkeeping whenever a newline character is passed.
This is the normal way to consume lexed text.
=for apidoc Amx|void|lex_discard_to|char *ptr
Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
-up to I<ptr>. The remaining content of the buffer will be moved, and
-all pointers into the buffer updated appropriately. I<ptr> must not
+up to C<ptr>. The remaining content of the buffer will be moved, and
+all pointers into the buffer updated appropriately. C<ptr> must not
be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
it is not permitted to discard text that has yet to be lexed.
If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
chunk (i.e., the current chunk has been entirely consumed), normally the
current chunk will be discarded at the same time that the new chunk is
-read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
+read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
will not be discarded. If the current chunk has not been entirely
consumed, then it will not be discarded regardless of the flag.
*/
#define LEX_FAKE_EOF 0x80000000
-#define LEX_NO_TERM 0x40000000
+#define LEX_NO_TERM 0x40000000 /* here-doc */
bool
Perl_lex_next_chunk(pTHX_ U32 flags)
bool got_some;
if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
+ if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
+ return FALSE;
linestr = PL_parser->linestr;
buf = SvPVX(linestr);
- if (!(flags & LEX_KEEP_PREVIOUS) &&
- PL_parser->bufptr == PL_parser->bufend) {
+ if (!(flags & LEX_KEEP_PREVIOUS)
+ && PL_parser->bufptr == PL_parser->bufend)
+ {
old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
linestart_pos = 0;
if (PL_parser->last_uni != PL_parser->bufend)
CopLINE_set(PL_curcop, PL_parser->preambling + 1);
PL_parser->preambling = NOLINE;
}
- if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
- PL_curstash != PL_debstash) {
+ if ( got_some_for_debugger
+ && PERLDB_LINE_OR_SAVESRC
+ && PL_curstash != PL_debstash)
+ {
/* debugger active and we're not compiling the debugger code,
* so store the line into the debugger's array of lines
*/
If the next character is in (or extends into) the next chunk of input
text, the next chunk will be read in. Normally the current chunk will be
-discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
-then the current chunk will not be discarded.
+discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
+bit set, then the current chunk will not be discarded.
If the input is being interpreted as UTF-8 and a UTF-8 encoding error
is encountered, an exception is generated.
If the next character is in (or extends into) the next chunk of input
text, the next chunk will be read in. Normally the current chunk will be
-discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
-then the current chunk will not be discarded.
+discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
+bit set, then the current chunk will not be discarded.
If the input is being interpreted as UTF-8 and a UTF-8 encoding error
is encountered, an exception is generated.
If spaces extend into the next chunk of input text, the next chunk will
be read in. Normally the current chunk will be discarded at the same
-time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
+time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
chunk will not be discarded.
=cut
incline(s);
need_incline = 0;
}
+ } else if (!c) {
+ s++;
} else {
break;
}
in_brackets = TRUE;
else if (*p == ']')
in_brackets = FALSE;
- else if ((*p == '@' || *p == '%') &&
- !after_slash &&
- !in_brackets ) {
+ else if ((*p == '@' || *p == '%')
+ && !after_slash
+ && !in_brackets )
+ {
must_be_last = TRUE;
greedy_proto = *p;
}
const char *n;
const char *e;
line_t line_num;
+ UV uv;
PERL_ARGS_ASSERT_INCLINE;
if (*e != '\n' && *e != '\0')
return; /* false alarm */
- line_num = grok_atou(n, &e) - 1;
+ if (!grok_atoUV(n, &uv, &e))
+ return;
+ line_num = ((line_t)uv) - 1;
if (t - s > 0) {
const STRLEN len = t - s;
{
PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
- while (s < PL_bufend && SPACE_OR_TAB(*s))
+ while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
s++;
} else {
STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
PL_bufptr = s;
lex_read_space(flags | LEX_KEEP_PREVIOUS |
- (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
+ (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
LEX_NO_NEXT_CHUNK : 0));
s = PL_bufptr;
PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
PL_last_uni++;
s = PL_last_uni;
while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
- s++;
+ s += UTF ? UTF8SKIP(s) : 1;
if ((t = strchr(s, '(')) && t < PL_bufptr)
return;
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Warning: Use of \"%.*s\" without parentheses is ambiguous",
- (int)(s - PL_last_uni), PL_last_uni);
+ "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
+ UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
}
/*
tokereport(type, &NEXTVAL_NEXTTOKE);
}
#endif
+ assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
PL_nexttype[PL_nexttoke] = type;
PL_nexttoke++;
if (PL_lex_state != LEX_KNOWNEXT) {
SV * const sv = newSVpvn_utf8(start, len,
!IN_BYTES
&& UTF
- && !is_ascii_string((const U8*)start, len)
+ && !is_invariant_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}
start = skipspace(start);
s = start;
- if (isIDFIRST_lazy_if(s,UTF) ||
- (allow_pack && *s == ':') )
+ if (isIDFIRST_lazy_if(s,UTF)
+ || (allow_pack && *s == ':') )
{
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (check_keyword) {
char *s2 = PL_tokenbuf;
+ STRLEN len2 = len;
if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
- s2 += 6, len -= 6;
- if (keyword(s2, len, 0))
+ s2 += 6, len2 -= 6;
+ if (keyword(s2, len2, 0))
return start;
}
if (token == METHOD) {
s = (char *)scan_version(s, ver, 0);
version = newSVOP(OP_CONST, 0, ver);
}
- else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
- (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
+ else if ((*s != ';' && *s != '{' && *s != '}' )
+ && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
{
PL_bufptr = s;
if (errstr)
return THING;
}
if (op_type == OP_CONST) {
- SV *sv = tokeq(PL_lex_stuff);
+ SV *sv = PL_lex_stuff;
+ PL_lex_stuff = NULL;
+ sv = tokeq(sv);
if (SvTYPE(sv) == SVt_PVIV) {
/* Overloaded constants, nothing fancy: Convert to SVt_PV: */
sv = nsv;
}
pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
- PL_lex_stuff = NULL;
return THING;
}
SAVEI32(PL_lex_casemods);
SAVEI32(PL_lex_starts);
SAVEI8(PL_lex_state);
+ SAVEI8(PL_lex_defer);
SAVESPTR(PL_lex_repl);
SAVEVPTR(PL_lex_inpat);
SAVEI16(PL_lex_inwhat);
PL_lex_stuff = NULL;
PL_sublex_info.repl = NULL;
+ /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
+ set for an inner quote-like operator and then an error causes scope-
+ popping. We must not have a PL_lex_stuff value left dangling, as
+ that breaks assumptions elsewhere. See bug #123617. */
+ SAVEGENERICSV(PL_lex_stuff);
+ SAVEGENERICSV(PL_sublex_info.repl);
+
PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
= SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
+ PL_parser->herelines;
PL_parser->herelines = 0;
}
- return ',';
+ return '/';
}
else {
const line_t l = CopLINE(PL_curcop);
PL_bufend = SvPVX(PL_linestr);
PL_bufend += SvCUR(PL_linestr);
PL_expect = XOPERATOR;
- PL_sublex_info.sub_inwhat = 0;
return ')';
}
}
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+ if (!SvCUR(res))
+ return res;
+
if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
e - backslash_ptr,
&first_bad_char_loc))
/* We deliberately don't try to print the malformed character, which
* might not print very well; it also may be just the first of many
* malformations, so don't print what comes after it */
- yyerror(Perl_form(aTHX_
+ yyerror_pv(Perl_form(aTHX_
"Malformed UTF-8 character immediately after '%.*s'",
- (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
+ (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
+ SVf_UTF8);
return NULL;
}
}
s++;
} else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
+ if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
goto bad_charname;
}
s += 2;
s++;
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
+ if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
{
goto bad_charname;
}
\l \L \u \U \Q \E
(?{ or (??{
-
In transliterations:
characters are VERY literal, except for - not at the start or end
of the string, which indicates a range. If the range is in bytes,
example when it is entirely composed
of hex constants */
SV *res; /* result from charnames */
+ STRLEN offset_to_max; /* The offset in the output to where the range
+ high-end character is temporarily placed */
/* Note on sizing: The scanned constant is placed into sv, which is
* initialized by newSV() assuming one byte of output for every byte of
UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
before set */
#ifdef EBCDIC
- UV literal_endpoint = 0;
- bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
+ int backslash_N = 0; /* ? was the character from \N{} */
+ int non_portable_endpoint = 0; /* ? In a range is an endpoint
+ platform-specific like \x65 */
#endif
PERL_ARGS_ASSERT_SCAN_CONST;
ENTER_with_name("scan_const");
SAVEFREESV(sv);
- while (s < send || dorange) {
+ while (s < send
+ || dorange /* Handle tr/// range at right edge of input */
+ ) {
/* get transliterations out of the way (they're most literal) */
if (PL_lex_inwhat == OP_TRANS) {
- /* expand a range A-Z to the full set of characters. AIE! */
- if (dorange) {
- I32 i; /* current expanded character */
- I32 min; /* first character in range */
- I32 max; /* last character in range */
+ /* But there isn't any special handling necessary unless there is a
+ * range, so for most cases we just drop down and handle the value
+ * as any other. There are two exceptions.
+ *
+ * 1. A minus sign indicates that we are actually going to have
+ * a range. In this case, skip the '-', set a flag, then drop
+ * down to handle what should be the end range value.
+ * 2. After we've handled that value, the next time through, that
+ * flag is set and we fix up the range.
+ *
+ * Ranges entirely within Latin1 are expanded out entirely, in
+ * order to avoid the significant overhead of making a swash.
+ * Ranges that extend above Latin1 have to have a swash, so there
+ * is no advantage to abbreviate them here, so they are stored here
+ * as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies a
+ * hyphen without any possible ambiguity. On EBCDIC machines, if
+ * the range is expressed as Unicode, the Latin1 portion is
+ * expanded out even if the entire range extends above Latin1.
+ * This is because each code point in it has to be processed here
+ * individually to get its native translation */
+
+ if (! dorange) {
+
+ /* Here, we don't think we're in a range. If we've processed
+ * at least one character, then see if this next one is a '-',
+ * indicating the previous one was the start of a range. But
+ * don't bother if we're too close to the end for the minus to
+ * mean that. */
+ if (*s != '-' || s >= send - 1 || s == start) {
+
+ /* A regular character. Process like any other, but first
+ * clear any flags */
+ didrange = FALSE;
+ dorange = FALSE;
#ifdef EBCDIC
- UV uvmax = 0;
+ non_portable_endpoint = 0;
+ backslash_N = 0;
#endif
+ /* Drops down to generic code to process current byte */
+ }
+ else {
+ if (didrange) { /* Something like y/A-C-Z// */
+ Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+ }
- if (has_utf8
-#ifdef EBCDIC
- && !native_range
-#endif
- ) {
- char * const c = (char*)utf8_hop((U8*)d, -1);
- char *e = d++;
- while (e-- > c)
- *(e + 1) = *e;
- *c = (char) ILLEGAL_UTF8_BYTE;
- /* mark the range as done, and continue */
- dorange = FALSE;
- didrange = TRUE;
- continue;
- }
+ dorange = TRUE;
- i = d - SvPVX_const(sv); /* remember current offset */
-#ifdef EBCDIC
- SvGROW(sv,
- SvLEN(sv) + ((has_utf8)
- ? (512 - UTF_CONTINUATION_MARK
- + UNISKIP(0x100))
- : 256));
- /* How many two-byte within 0..255: 128 in UTF-8,
- * 96 in UTF-8-mod. */
+ s++; /* Skip past the minus */
+
+ /* d now points to where the end-range character will be
+ * placed. Save it so won't have to go finding it later,
+ * and drop down to get that character. (Actually we
+ * instead save the offset, to handle the case where a
+ * realloc in the meantime could change the actual
+ * pointer). We'll finish processing the range the next
+ * time through the loop */
+ offset_to_max = d - SvPVX_const(sv);
+ }
+ } /* End of not a range */
+ else {
+ /* Here we have parsed a range. Now must handle it. At this
+ * point:
+ * 'sv' is a SV* that contains the output string we are
+ * constructing. The final two characters in that string
+ * are the range start and range end, in order.
+ * 'd' points to just beyond the range end in the 'sv' string,
+ * where we would next place something
+ * 'offset_to_max' is the offset in 'sv' at which the character
+ * before 'd' begins.
+ */
+ const char * max_ptr = SvPVX_const(sv) + offset_to_max;
+ const char * min_ptr;
+ IV range_min;
+ IV range_max; /* last character in range */
+ STRLEN save_offset;
+ STRLEN grow;
+#ifndef EBCDIC /* Not meaningful except in EBCDIC, so initialize to false */
+ const bool convert_unicode = FALSE;
+ const IV real_range_max = 0;
#else
- SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
+ bool convert_unicode;
+ IV real_range_max = 0;
#endif
- d = SvPVX(sv) + i; /* refresh d after realloc */
-#ifdef EBCDIC
+
+ /* Get the range-ends code point values. */
if (has_utf8) {
- int j;
- for (j = 0; j <= 1; j++) {
- char * const c = (char*)utf8_hop((U8*)d, -1);
- const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
- if (j)
- min = (U8)uv;
- else if (uv < 256)
- max = (U8)uv;
- else {
- max = (U8)0xff; /* only to \xff */
- uvmax = uv; /* \x{100} to uvmax */
- }
- d = c; /* eat endpoint chars */
- }
+ /* We know the utf8 is valid, because we just constructed
+ * it ourselves in previous loop iterations */
+ min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
+ range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
+ range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
}
- else {
-#endif
- d -= 2; /* eat the first char and the - */
- min = (U8)*d; /* first char in range */
- max = (U8)d[1]; /* last char in range */
+ else {
+ min_ptr = max_ptr - 1;
+ range_min = * (U8*) min_ptr;
+ range_max = * (U8*) max_ptr;
+ }
+
#ifdef EBCDIC
- }
+ /* On EBCDIC platforms, we may have to deal with portable
+ * ranges. These happen if at least one range endpoint is a
+ * Unicode value (\N{...}), or if the range is a subset of
+ * [A-Z] or [a-z], and both ends are literal characters,
+ * like 'A', and not like \x{C1} */
+ if ((convert_unicode
+ = cBOOL(backslash_N) /* \N{} forces Unicode, hence
+ portable range */
+ || ( ! non_portable_endpoint
+ && (( isLOWER_A(range_min) && isLOWER_A(range_max))
+ || (isUPPER_A(range_min) && isUPPER_A(range_max))))
+ )) {
+
+ /* Special handling is needed for these portable ranges.
+ * They are defined to all be in Unicode terms, which
+ * include all Unicode code points between the end points.
+ * Convert to Unicode to get the Unicode range. Later we
+ * will convert each code point in the range back to
+ * native. */
+ range_min = NATIVE_TO_UNI(range_min);
+ range_max = NATIVE_TO_UNI(range_max);
+ }
#endif
- if (min > max) {
- Perl_croak(aTHX_
- "Invalid range \"%c-%c\" in transliteration operator",
- (char)min, (char)max);
+ if (range_min > range_max) {
+ if (convert_unicode) {
+ /* Need to convert back to native for meaningful
+ * messages for this platform */
+ range_min = UNI_TO_NATIVE(range_min);
+ range_max = UNI_TO_NATIVE(range_max);
+ }
+
+ /* Use the characters themselves for the error message if
+ * ASCII printables; otherwise some visible representation
+ * of them */
+ if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
+ Perl_croak(aTHX_
+ "Invalid range \"%c-%c\" in transliteration operator",
+ (char)range_min, (char)range_max);
+ }
+ else if (convert_unicode) {
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ Perl_croak(aTHX_
+ "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
+ " in transliteration operator",
+ range_min, range_max);
+ }
+ else {
+ /* diag_listed_as: Invalid range "%s" in transliteration operator */
+ Perl_croak(aTHX_
+ "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
+ " in transliteration operator",
+ range_min, range_max);
+ }
}
+ if (has_utf8) {
+
+ /* We try to avoid creating a swash. If the upper end of
+ * this range is below 256, this range won't force a swash;
+ * otherwise it does force a swash, and as long as we have
+ * to have one, we might as well not expand things out.
+ * But if it's EBCDIC, we may have to look at each
+ * character below 256 if we have to convert to/from
+ * Unicode values */
+ if (range_max > 255
#ifdef EBCDIC
- /* Because of the discontinuities in EBCDIC A-Z and a-z, expand
- * any subsets of these ranges into individual characters */
- if (literal_endpoint == 2 &&
- ((isLOWER_A(min) && isLOWER_A(max)) ||
- (isUPPER_A(min) && isUPPER_A(max))))
- {
- for (i = min; i <= max; i++) {
- if (isALPHA_A(i))
- *d++ = i;
- }
- }
- else
+ && (range_min > 255 || ! convert_unicode)
#endif
- for (i = min; i <= max; i++)
-#ifdef EBCDIC
- if (has_utf8) {
- append_utf8_from_native_byte(i, &d);
+ ) {
+ /* Move the high character one byte to the right; then
+ * insert between it and the range begin, an illegal
+ * byte which serves to indicate this is a range (using
+ * a '-' could be ambiguous). */
+ char *e = d++;
+ while (e-- > max_ptr) {
+ *(e + 1) = *e;
}
- else
-#endif
- *d++ = (char)i;
-
+ *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
+ goto range_done;
+ }
+
+ /* Here, we're going to expand out the range. For EBCDIC
+ * the range can extend above 255 (not so in ASCII), so
+ * for EBCDIC, split it into the parts above and below
+ * 255/256 */
#ifdef EBCDIC
- if (uvmax) {
- d = (char*)uvchr_to_utf8((U8*)d, 0x100);
- if (uvmax > 0x101)
- *d++ = (char) ILLEGAL_UTF8_BYTE;
- if (uvmax > 0x100)
- d = (char*)uvchr_to_utf8((U8*)d, uvmax);
- }
+ if (range_max > 255) {
+ real_range_max = range_max;
+ range_max = 255;
+ }
#endif
+ }
- /* mark the range as done, and continue */
- dorange = FALSE;
- didrange = TRUE;
+ /* Here we need to expand out the string to contain each
+ * character in the range. Grow the output to handle this */
+
+ save_offset = min_ptr - SvPVX_const(sv);
+
+ /* The base growth is the number of code points in the range */
+ grow = range_max - range_min + 1;
+ if (has_utf8) {
+
+ /* But if the output is UTF-8, some of those characters may
+ * need two bytes (since the maximum range value here is
+ * 255, the max bytes per character is two). On ASCII
+ * platforms, it's not much trouble to get an accurate
+ * count of what's needed. But on EBCDIC, the ones that
+ * need 2 bytes are scattered around, so just use a worst
+ * case value instead of calculating for that platform. */
#ifdef EBCDIC
- literal_endpoint = 0;
+ grow *= 2;
+#else
+ /* Only those above 127 require 2 bytes. This may be
+ * everything in the range, or not */
+ if (range_min > 127) {
+ grow *= 2;
+ }
+ else if (range_max > 127) {
+ grow += range_max - 127;
+ }
#endif
- continue;
- }
+ }
+
+ /* Subtract 3 for the bytes that were already accounted for
+ * (min, max, and the hyphen) */
+ SvGROW(sv, SvLEN(sv) + grow - 3);
+ d = SvPVX(sv) + save_offset; /* refresh d after realloc */
+
+ /* Here, we expand out the range. On ASCII platforms, the
+ * compiler should optimize out the 'convert_unicode==TRUE'
+ * portion of this */
+ if (convert_unicode) {
+ IV i;
- /* range begins (ignore - as first or last char) */
- else if (*s == '-' && s+1 < send && s != start) {
- if (didrange) {
- Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
+ /* Recall that the min and max are now in Unicode terms, so
+ * we have to convert each character to its native
+ * equivalent */
+ if (has_utf8) {
+ for (i = range_min; i <= range_max; i++) {
+ append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
+ (U8 **) &d);
+ }
+ }
+ else {
+ for (i = range_min; i <= range_max; i++) {
+ *d++ = (char)LATIN1_TO_NATIVE((U8) i);
+ }
+ }
}
- if (has_utf8
-#ifdef EBCDIC
- && !native_range
-#endif
- ) {
- *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
- s++;
- continue;
+ else {
+ IV i;
+
+ /* Here, no conversions are necessary, which means that the
+ * first character in the range is already in 'd' and
+ * valid, so we can skip overwriting it */
+ if (has_utf8) {
+ d += UTF8SKIP(d);
+ for (i = range_min + 1; i <= range_max; i++) {
+ append_utf8_from_native_byte((U8) i, (U8 **) &d);
+ }
+ }
+ else {
+ d++;
+ for (i = range_min + 1; i <= range_max; i++) {
+ *d++ = (char)i;
+ }
+ }
}
- dorange = TRUE;
- s++;
- }
- else {
- didrange = FALSE;
-#ifdef EBCDIC
- literal_endpoint = 0;
- native_range = TRUE;
-#endif
- }
- }
- /* if we get here, we're not doing a transliteration */
+ /* (Compilers should optimize this out for non-EBCDIC). If the
+ * original range extended above 255, add in that portion */
+ if (real_range_max) {
+ *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
+ *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
+ if (real_range_max > 0x101)
+ *d++ = (char) ILLEGAL_UTF8_BYTE;
+ if (real_range_max > 0x100)
+ d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
+ }
+ range_done:
+ /* mark the range as done, and continue */
+ didrange = TRUE;
+ dorange = FALSE;
+#ifdef EBCDIC
+ non_portable_endpoint = 0;
+ backslash_N = 0;
+#endif
+ continue;
+ } /* End of is a range */
+ } /* End of transliteration. Joins main code after these else's */
else if (*s == '[' && PL_lex_inpat && !in_charclass) {
char *s1 = s-1;
int esc = 0;
while (s+1 < send && *s != ')')
*d++ = *s++;
}
- else if (!PL_lex_casemods &&
- ( s[2] == '{' /* This should match regcomp.c */
- || (s[2] == '?' && s[3] == '{')))
+ else if (!PL_lex_casemods
+ && ( s[2] == '{' /* This should match regcomp.c */
+ || (s[2] == '?' && s[3] == '{')))
{
break;
}
}
/* likewise skip #-initiated comments in //x patterns */
- else if (*s == '#' && PL_lex_inpat && !in_charclass &&
- ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
+ else if (*s == '#'
+ && PL_lex_inpat
+ && !in_charclass
+ && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
+ {
while (s+1 < send && *s != '\n')
*d++ = *s++;
}
(@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
*/
else if (*s == '@' && s[1]) {
- if (isWORDCHAR_lazy_if(s+1,UTF))
+ if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
break;
if (strchr(":'{$", s[1]))
break;
/* warn on \1 - \9 in substitution replacements, but note that \11
* is an octal; and \19 is \1 followed by '9' */
- if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
- isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
+ if (PL_lex_inwhat == OP_SUBST
+ && !PL_lex_inpat
+ && isDIGIT(*s)
+ && *s != '0'
+ && !isDIGIT(s[1]))
{
/* diag_listed_as: \%d better written as $%d */
Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
}
switch (*s) {
-
- /* quoted - in transliterations */
- case '-':
- if (PL_lex_inwhat == OP_TRANS) {
- *d++ = *s++;
- continue;
- }
- /* FALLTHROUGH */
default:
{
if ((isALPHANUMERIC(*s)))
* to recode the rest of the string into utf8 */
/* Here uv is the ordinal of the next character being added */
- if (!UVCHR_IS_INVARIANT(uv)) {
+ if (UVCHR_IS_INVARIANT(uv)) {
+ *d++ = (char) uv;
+ }
+ else {
if (!has_utf8 && uv > 255) {
/* Might need to recode whatever we have accumulated so
* far if it contains any chars variant in utf8 or
SvPOK_on(sv);
*d = '\0';
/* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- UNISKIP(uv) + (STRLEN)(send - s) + 1);
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+ /* Above-latin1 in string
+ * implies no encoding */
+ |SV_UTF8_NO_ENCODING,
+ UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
if (has_utf8) {
d = (char*)uvchr_to_utf8((U8*)d, uv);
- if (PL_lex_inwhat == OP_TRANS &&
- PL_sublex_info.sub_op) {
+ if (PL_lex_inwhat == OP_TRANS
+ && PL_sublex_info.sub_op)
+ {
PL_sublex_info.sub_op->op_private |=
(PL_lex_repl ? OPpTRANS_FROM_UTF
: OPpTRANS_TO_UTF);
}
-#ifdef EBCDIC
- if (uv > 255 && !dorange)
- native_range = FALSE;
-#endif
}
else {
*d++ = (char)uv;
}
}
- else {
- *d++ = (char) uv;
- }
+#ifdef EBCDIC
+ non_portable_endpoint++;
+#endif
continue;
case 'N':
/* In a non-pattern \N must be like \N{U+0041}, or it can be a
* named character, like \N{LATIN SMALL LETTER A}, or a named
* sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
- * GRAVE}. For convenience all three forms are referred to as
- * "named characters" below.
+ * GRAVE} (except y/// can't handle the latter, croaking). For
+ * convenience all three forms are referred to as "named
+ * characters" below.
*
* For patterns, \N also can mean to match a non-newline. Code
* before this 'switch' statement should already have handled
*
* The structure of this section of code (besides checking for
* errors and upgrading to utf8) is:
- * If the named character is of the form \N{U+...}, pass it
+ * If the named character is of the form \N{U+...}, pass it
* through if a pattern; otherwise convert the code point
* to utf8
- * Otherwise must be some \N{NAME}: convert to \N{U+c1.c2...}
- * if a pattern; otherwise convert to utf8
+ * Otherwise must be some \N{NAME}: convert to
+ * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
*
- * If the regex compiler should ever need to differentiate
- * between the \N{U+...} and \N{name} forms, that could easily
- * be done here by stripping any leading zeros from the
- * \N{U+...} case, and adding them to the other one. */
-
- /* Here, 's' points to the 'N'; the test below is guaranteed to
+ * Transliteration is an exception. The conversion to utf8 is
+ * only done if the code point requires it to be representable.
+ *
+ * Here, 's' points to the 'N'; the test below is guaranteed to
* succeed if we are being called on a pattern, as we already
* know from a test above that the next character is a '{'. A
* non-pattern \N must mean 'named character', which requires
/* Here it looks like a named character */
if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
- I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
- | PERL_SCAN_DISALLOW_PREFIX;
- STRLEN len;
-
s += 2; /* Skip to next char after the 'U+' */
- len = e - s;
- uv = grok_hex(s, &len, &flags, NULL);
- if (len == 0 || len != (STRLEN)(e - s)) {
- yyerror("Invalid hexadecimal number in \\N{U+...}");
- s = e + 1;
- continue;
- }
-
if (PL_lex_inpat) {
- s -= 5; /* Include the '\N{U+' */
-#ifdef EBCDIC
- /* On EBCDIC platforms, in \N{U+...}, the '...' is a
- * Unicode value, so convert to native so downstream
- * code can continue to assume it's native */
- d += my_snprintf(d, e - s + 1 + 1, /* includes the '}'
- and the \0 */
- "\\N{U+%X}",
- (unsigned int) UNI_TO_NATIVE(uv));
-#else
- /* On non-EBCDIC platforms, pass it through unchanged.
- * The reason we evaluated the number above is to make
- * sure there wasn't a syntax error. */
- Copy(s, d, e - s + 1, char); /* +1 is for the '}' */
- d += e - s + 1;
-#endif
+
+ /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
+ /* Check the syntax. */
+ const char *orig_s;
+ orig_s = s - 5;
+ if (!isXDIGIT(*s)) {
+ bad_NU:
+ yyerror(
+ "Invalid hexadecimal number in \\N{U+...}"
+ );
+ s = e + 1;
+ continue;
+ }
+ while (++s < e) {
+ if (isXDIGIT(*s))
+ continue;
+ else if ((*s == '.' || *s == '_')
+ && isXDIGIT(s[1]))
+ continue;
+ goto bad_NU;
+ }
+
+ /* Pass everything through unchanged.
+ * +1 is for the '}' */
+ Copy(orig_s, d, e - orig_s + 1, char);
+ d += e - orig_s + 1;
}
else { /* Not a pattern: convert the hex to string */
-
- /* If the destination is not in utf8, unconditionally
- * recode it to be so. This is because \N{} implies
- * Unicode semantics, and scalars have to be in utf8
- * to guarantee those semantics */
- if (! has_utf8) {
+ I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_SILENT_ILLDIGIT
+ | PERL_SCAN_DISALLOW_PREFIX;
+ STRLEN len = e - s;
+ uv = grok_hex(s, &len, &flags, NULL);
+ if (len == 0 || (len != (STRLEN)(e - s)))
+ goto bad_NU;
+
+ /* For non-tr///, if the destination is not in utf8,
+ * unconditionally recode it to be so. This is
+ * because \N{} implies Unicode semantics, and scalars
+ * have to be in utf8 to guarantee those semantics.
+ * tr/// doesn't care about Unicode rules, so no need
+ * there to upgrade to UTF-8 for small enough code
+ * points */
+ if (! has_utf8 && ( uv > 0xFF
+ || PL_lex_inwhat != OP_TRANS))
+ {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
/* See Note on sizing above. */
sv_utf8_upgrade_flags_grow(
- sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- UNISKIP(uv) + (STRLEN)(send - e) + 1);
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+ UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
/* Add the (Unicode) code point to the output. */
- if (UNI_IS_INVARIANT(uv)) {
+ if (OFFUNI_IS_INVARIANT(uv)) {
*d++ = (char) LATIN1_TO_NATIVE(uv);
}
else {
char hex_string[4];
int len =
my_snprintf(hex_string,
- sizeof(hex_string),
- "%02X.", (U8) *str);
- PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(hex_string));
+ sizeof(hex_string),
+ "%02X.",
+
+ /* The regex compiler is
+ * expecting Unicode, not
+ * native */
+ (U8) NATIVE_TO_LATIN1(*str));
+ PERL_MY_SNPRINTF_POST_GUARD(len,
+ sizeof(hex_string));
Copy(hex_string, d, 3, char);
d += 3;
str++;
len,
&char_length,
UTF8_ALLOW_ANYUV);
- /* Convert first code point to hex, including
- * the boiler plate before it. */
+ /* Convert first code point to Unicode hex,
+ * including the boiler plate before it. */
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X",
- (unsigned int) uv);
+ "\\N{U+%X",
+ (unsigned int) NATIVE_TO_UNI(uv));
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
d += output_length;
/* For each subsequent character, append dot and
- * its ordinal in hex */
+ * its Unicode code point in hex */
while ((str += char_length) < str_end) {
const STRLEN off = d - SvPVX_const(sv);
U32 uv = utf8n_to_uvchr((U8 *) str,
UTF8_ALLOW_ANYUV);
output_length =
my_snprintf(hex_string,
- sizeof(hex_string),
- ".%X",
- (unsigned int) uv);
+ sizeof(hex_string),
+ ".%X",
+ (unsigned int) NATIVE_TO_UNI(uv));
d = off + SvGROW(sv, off
+ output_length
else { /* Here, not in a pattern. Convert the name to a
* string. */
- /* If destination is not in utf8, unconditionally
- * recode it to be so. This is because \N{} implies
- * Unicode semantics, and scalars have to be in utf8
- * to guarantee those semantics */
- if (! has_utf8) {
+ if (PL_lex_inwhat == OP_TRANS) {
+ str = SvPV_const(res, len);
+ if (len > ((SvUTF8(res))
+ ? UTF8SKIP(str)
+ : 1U))
+ {
+ yyerror(Perl_form(aTHX_
+ "%.*s must not be a named sequence"
+ " in transliteration operator",
+ /* +1 to include the "}" */
+ (int) (e + 1 - start), start));
+ goto end_backslash_N;
+ }
+ }
+ else if (! SvUTF8(res)) {
+ /* Make sure \N{} return is UTF-8. This is because
+ * \N{} implies Unicode semantics, and scalars have to
+ * be in utf8 to guarantee those semantics; but not
+ * needed in tr/// */
+ sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
+ str = SvPV_const(res, len);
+ }
+
+ /* Upgrade destination to be utf8 if this new
+ * component is */
+ if (! has_utf8 && SvUTF8(res)) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
*d = '\0';
const STRLEN off = d - SvPVX_const(sv);
d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
}
- if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */
- sv_utf8_upgrade(res);
- str = SvPV_const(res, len);
- }
Copy(str, d, len, char);
d += len;
}
SvREFCNT_dec(res);
} /* End \N{NAME} */
+
+ end_backslash_N:
#ifdef EBCDIC
- if (!dorange)
- native_range = FALSE; /* \N{} is defined to be Unicode */
+ backslash_N++; /* \N{} is defined to be Unicode */
#endif
s = e + 1; /* Point to just after the '}' */
continue;
else {
yyerror("Missing control char name in \\c");
}
+#ifdef EBCDIC
+ non_portable_endpoint++;
+#endif
continue;
/* printf-style backslashes, formfeeds, newlines, etc */
s++;
continue;
} /* end if (backslash) */
-#ifdef EBCDIC
- else
- literal_endpoint++;
-#endif
default_action:
/* If we started with encoded form, or already know we want it,
if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
STRLEN len = 1;
-
/* One might think that it is wasted effort in the case of the
* source being utf8 (this_utf8 == TRUE) to take the next character
* in the source, convert it to an unsigned value, and then convert
const UV nextuv = (this_utf8)
? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
: (UV) ((U8) *s);
- const STRLEN need = UNISKIP(nextuv);
+ const STRLEN need = UVCHR_SKIP(nextuv);
if (!has_utf8) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
s += len;
d = (char*)uvchr_to_utf8((U8*)d, nextuv);
-#ifdef EBCDIC
- if (uv > 255 && !dorange)
- native_range = FALSE;
-#endif
}
else {
*d++ = *s++;
" >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
SvPOK_on(sv);
- if (PL_encoding && !has_utf8) {
- sv_recode_to_utf8(sv, PL_encoding);
+ if (IN_ENCODING && !has_utf8) {
+ sv_recode_to_utf8(sv, _get_encoding());
if (SvUTF8(sv))
has_utf8 = TRUE;
}
else
weight -= 10;
}
- else if (*s == '$' && s[1] &&
- strchr("[#!%*<>()-=",s[1])) {
+ else if (*s == '$'
+ && s[1]
+ && strchr("[#!%*<>()-=",s[1]))
+ {
if (/*{*/ strchr("])} =",s[2]))
weight -= 10;
else
&& !(last_un_char == '$' || last_un_char == '@'
|| last_un_char == '&')
&& isALPHA(*s) && s[1] && isALPHA(s[1])) {
- char *d = tmpbuf;
+ char *d = s;
while (isALPHA(*s))
- *d++ = *s++;
- *d = '\0';
- if (keyword(tmpbuf, d - tmpbuf, 0))
+ s++;
+ if (keyword(d, s - d, 0))
weight -= 150;
}
if (un_char == last_un_char + 1)
}
if (*start == '$') {
- if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
- isUPPER(*PL_tokenbuf))
+ if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
+ || isUPPER(*PL_tokenbuf))
return 0;
s = skipspace(s);
PL_bufptr = start;
if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
return PL_curstash;
- if (len > 2 &&
- (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
- (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
+ if (len > 2
+ && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
+ && (gv = gv_fetchpvn_flags(pkgname,
+ len,
+ ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
{
return GvHV(gv); /* Foo:: */
}
STATIC bool
S_word_takes_any_delimeter(char *p, STRLEN len)
{
- return (len == 1 && strchr("msyq", p[0])) ||
- (len == 2 && (
- (p[0] == 't' && p[1] == 'r') ||
- (p[0] == 'q' && strchr("qwxr", p[1]))));
+ return (len == 1 && strchr("msyq", p[0]))
+ || (len == 2
+ && ((p[0] == 't' && p[1] == 'r')
+ || (p[0] == 'q' && strchr("qwxr", p[1]))));
}
static void
SvREFCNT_dec(tmp);
} );
- switch (PL_lex_state) {
- case LEX_NORMAL:
- case LEX_INTERPNORMAL:
- break;
-
/* when we've already built the next token, just pull it out of the queue */
- case LEX_KNOWNEXT:
+ if (PL_nexttoke) {
PL_nexttoke--;
pl_yylval = PL_nextval[PL_nexttoke];
if (!PL_nexttoke) {
}
return REPORT(next_type == 'p' ? pending_ident() : next_type);
}
+ }
+
+ switch (PL_lex_state) {
+ case LEX_NORMAL:
+ case LEX_INTERPNORMAL:
+ break;
/* interpolated case modifiers like \L \U, including \Q and \E.
when we get here, PL_bufptr is at the \
I32 tmp;
if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
- if ((*s == 'L' || *s == 'U' || *s == 'F') &&
- (strchr(PL_lex_casestack, 'L')
+ if ((*s == 'L' || *s == 'U' || *s == 'F')
+ && (strchr(PL_lex_casestack, 'L')
|| strchr(PL_lex_casestack, 'U')
- || strchr(PL_lex_casestack, 'F'))) {
+ || strchr(PL_lex_casestack, 'F')))
+ {
PL_lex_casestack[--PL_lex_casemods] = '\0';
PL_lex_allbrackets--;
return REPORT(')');
/* FALLTHROUGH */
case LEX_INTERPEND:
+ /* Treat state as LEX_NORMAL if we have no inner lexing scope.
+ XXX This hack can be removed if we stop setting PL_lex_state to
+ LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */
+ if (UNLIKELY(!PL_lex_inwhat)) {
+ PL_lex_state = LEX_NORMAL;
+ break;
+ }
+
if (PL_lex_dojoin) {
const U8 dojoin_was = PL_lex_dojoin;
PL_lex_dojoin = FALSE;
Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
(long) PL_lex_brackets);
#endif
+ /* Treat state as LEX_NORMAL when not in an inner lexing scope.
+ XXX This hack can be removed if we stop setting PL_lex_state to
+ LEX_KNOWNEXT. */
+ if (UNLIKELY(!PL_lex_inwhat)) {
+ PL_lex_state = LEX_NORMAL;
+ break;
+ }
+
if (PL_bufptr == PL_bufend)
return REPORT(sublex_done());
case 26:
goto fake_eof; /* emulate EOF on ^D or ^Z */
case 0:
- if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
+ if ((!PL_rsfp || PL_lex_inwhat)
+ && (!PL_parser->filtered || s+1 < PL_bufend)) {
PL_last_uni = 0;
PL_last_lop = 0;
- if (PL_lex_brackets &&
- PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
+ if (PL_lex_brackets
+ && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
+ {
yyerror((const char *)
(PL_lex_formbrack
? "Format not terminated"
PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
- if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
+ if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
update_debugger_info(PL_linestr, NULL, 0);
goto retry;
}
s = PL_bufptr;
/* If it looks like the start of a BOM or raw UTF-16,
* check if it in fact is. */
- if (bof && PL_rsfp &&
- (*s == 0 ||
- *(U8*)s == BOM_UTF8_FIRST_BYTE ||
- *(U8*)s >= 0xFE ||
- s[1] == 0)) {
+ if (bof && PL_rsfp
+ && (*s == 0
+ || *(U8*)s == BOM_UTF8_FIRST_BYTE
+ || *(U8*)s >= 0xFE
+ || s[1] == 0))
+ {
Off_t offset = (IV)PerlIO_tell(PL_rsfp);
bof = (offset == (Off_t)SvCUR(PL_linestr));
#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
d = instr(s,"perl -");
if (!d) {
d = instr(s,"perl");
+ if (d && d[4] == '6')
+ d = NULL;
#if defined(DOSISH)
/* avoid getting into infinite loops when shebang
* line contains "Perl" rather than "perl" */
*s = '#'; /* Don't try to parse shebang line */
}
#endif /* ALTERNATE_SHEBANG */
- if (!d &&
- *s == '#' &&
- ipathend > ipath &&
- !PL_minus_c &&
- !instr(s,"indir") &&
- instr(PL_origargv[0],"perl"))
+ if (!d
+ && *s == '#'
+ && ipathend > ipath
+ && !PL_minus_c
+ && !instr(s,"indir")
+ && instr(PL_origargv[0],"perl"))
{
dVAR;
char **newargv;
} while (argc && argv[0][0] == '-' && argv[0][1]);
init_argv_symbols(argc,argv);
}
- if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
- ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
+ if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
+ || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
/* if we have already added "LINE: while (<>) {",
we must not do it again */
{
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
PL_last_lop = PL_last_uni = NULL;
PL_preambled = FALSE;
- if (PERLDB_LINE || PERLDB_SAVESRC)
+ if (PERLDB_LINE_OR_SAVESRC)
(void)gv_fetchfile(PL_origfilename);
goto retry;
}
}
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
PL_lex_state = LEX_FORMLINE;
- NEXTVAL_NEXTTOKE.ival = 0;
force_next(FORMRBRACK);
TOKEN(';');
}
Perl_croak(aTHX_
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
- case ' ': case '\t': case '\f': case 013:
+ case ' ': case '\t': case '\f': case '\v':
s++;
goto retry;
case '#':
case '\n':
- if (PL_lex_state != LEX_NORMAL ||
- (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
+ if (PL_lex_state != LEX_NORMAL
+ || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
+ {
const bool in_comment = *s == '#';
if (*s == '#' && s == PL_linestart && PL_in_eval
&& !PL_rsfp && !PL_parser->filtered) {
incline(s);
if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
PL_lex_state = LEX_FORMLINE;
- NEXTVAL_NEXTTOKE.ival = 0;
force_next(FORMRBRACK);
TOKEN(';');
}
else if (*s == '>') {
s++;
s = skipspace(s);
- if (FEATURE_POSTDEREF_IS_ENABLED && (
- ((*s == '$' || *s == '&') && s[1] == '*')
+ if (((*s == '$' || *s == '&') && s[1] == '*')
||(*s == '$' && s[1] == '#' && s[2] == '*')
||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
||(*s == '*' && (s[1] == '*' || s[1] == '{'))
- ))
+ )
{
- Perl_ck_warner_d(aTHX_
- packWARN(WARN_EXPERIMENTAL__POSTDEREF),
- "Postfix dereference is experimental"
- );
PL_expect = XPOSTDEREF;
TOKEN(ARROW);
}
TERM(ARROW);
}
if (PL_expect == XOPERATOR) {
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '='
+ && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s--;
TOKEN(0);
}
OPERATOR(PREINC);
}
if (PL_expect == XOPERATOR) {
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '='
+ && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s--;
TOKEN(0);
}
s++;
if (*s == '*') {
s++;
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '=' && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s -= 2;
TOKEN(0);
}
PWop(OP_POW);
}
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '='
+ && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s--;
TOKEN(0);
}
case '%':
{
if (PL_expect == XOPERATOR) {
- if (s[1] == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ if (s[1] == '='
+ && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
TOKEN(0);
+ }
++s;
PL_parser->saw_infix_sigil = 1;
Mop(OP_MODULO);
TERM('%');
}
case '^':
+ d = s;
+ bof = FEATURE_BITWISE_IS_ENABLED;
+ if (bof && s[1] == '.')
+ s++;
if (!PL_lex_allbrackets && PL_lex_fakeeof >=
(s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
+ {
+ s = d;
TOKEN(0);
+ }
s++;
- BOop(OP_BIT_XOR);
+ BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
case '[':
if (PL_lex_brackets > 100)
Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
Eop(OP_SMARTMATCH);
}
s++;
- OPERATOR('~');
+ if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
+ s++;
+ BCop(OP_SCOMPLEMENT);
+ }
+ BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
case ',':
if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
TOKEN(0);
sv_catsv(sv, PL_lex_stuff);
attrs = op_append_elem(OP_LIST, attrs,
newSVOP(OP_CONST, 0, sv));
- SvREFCNT_dec(PL_lex_stuff);
+ SvREFCNT_dec_NN(PL_lex_stuff);
PL_lex_stuff = NULL;
}
else {
sv_free(sv);
CvMETHOD_on(PL_compcv);
}
+ else if (!PL_in_my && len == 5
+ && strnEQ(SvPVX(sv), "const", len))
+ {
+ sv_free(sv);
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+ ":const is experimental"
+ );
+ CvANONCONST_on(PL_compcv);
+ if (!CvANON(PL_compcv))
+ yyerror(":const is not permitted on named "
+ "subroutines");
+ }
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
process, and shouldn't bother appending recognized
/* XXX losing whitespace on sequential attributes here */
}
{
- if (*s != ';' && *s != '}' &&
- !(PL_expect == XOPERATOR
- ? (*s == '=' || *s == ')')
- : (*s == '{' || *s == '('))) {
+ if (*s != ';'
+ && *s != '}'
+ && !(PL_expect == XOPERATOR
+ ? (*s == '=' || *s == ')')
+ : (*s == '{' || *s == '(')))
+ {
const char q = ((*s == '\'') ? '"' : '\'');
/* If here for an expression, and parsed no attrs, back
off. */
}
switch (PL_expect) {
case XTERM:
+ case XTERMORDORDOR:
PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
PL_lex_allbrackets++;
OPERATOR(HASHBRACK);
Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
CopLINE_inc(PL_curcop);
}
+ d = s;
+ if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
+ s++;
if (!PL_lex_allbrackets && PL_lex_fakeeof >=
(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
+ s = d;
s--;
TOKEN(0);
}
- PL_parser->saw_infix_sigil = 1;
- BAop(OP_BIT_AND);
+ if (d == s) {
+ PL_parser->saw_infix_sigil = 1;
+ BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
+ }
+ else
+ BAop(OP_SBIT_AND);
}
PL_tokenbuf[0] = '&';
s = scan_ident(s - 1, PL_tokenbuf + 1,
sizeof PL_tokenbuf - 1, TRUE);
+ pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
if (PL_tokenbuf[1]) {
- PL_expect = XOPERATOR;
force_ident_maybe_lex('&');
}
else
PREREF('&');
- pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
TERM('&');
case '|':
AOPERATOR(OROR);
}
s--;
+ d = s;
+ if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
+ s++;
if (!PL_lex_allbrackets && PL_lex_fakeeof >=
(*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
- s--;
+ s = d - 1;
TOKEN(0);
}
- BOop(OP_BIT_OR);
+ BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
case '=':
s++;
{
const char tmp = *s++;
if (tmp == '=') {
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+ {
s -= 2;
TOKEN(0);
}
Eop(OP_EQ);
}
if (tmp == '>') {
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
+ {
s -= 2;
TOKEN(0);
}
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"Reversed %c= operator",(int)tmp);
s--;
- if (PL_expect == XSTATE && isALPHA(tmp) &&
- (s == PL_linestart+1 || s[-2] == '\n') )
- {
- if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
- || PL_lex_state != LEX_NORMAL) {
- d = PL_bufend;
- while (s < d) {
- if (*s++ == '\n') {
- incline(s);
- if (strnEQ(s,"=cut",4)) {
- s = strchr(s,'\n');
- if (s)
- s++;
- else
- s = d;
- incline(s);
- goto retry;
- }
- }
- }
- goto retry;
- }
- s = PL_bufend;
- PL_parser->in_pod = 1;
- goto retry;
- }
+ if (PL_expect == XSTATE
+ && isALPHA(tmp)
+ && (s == PL_linestart+1 || s[-2] == '\n') )
+ {
+ if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
+ || PL_lex_state != LEX_NORMAL) {
+ d = PL_bufend;
+ while (s < d) {
+ if (*s++ == '\n') {
+ incline(s);
+ if (strnEQ(s,"=cut",4)) {
+ s = strchr(s,'\n');
+ if (s)
+ s++;
+ else
+ s = d;
+ incline(s);
+ goto retry;
+ }
+ }
+ }
+ goto retry;
+ }
+ s = PL_bufend;
+ PL_parser->in_pod = 1;
+ goto retry;
+ }
}
if (PL_expect == XBLOCK) {
const char *t = s;
while (t < PL_bufend && isSPACE(*t))
++t;
- if (*t == '/' || *t == '?' ||
- ((*t == 'm' || *t == 's' || *t == 'y')
- && !isWORDCHAR(t[1])) ||
- (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
+ if (*t == '/' || *t == '?'
+ || ((*t == 'm' || *t == 's' || *t == 'y')
+ && !isWORDCHAR(t[1]))
+ || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
"!=~ should be !~");
}
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+ {
s -= 2;
TOKEN(0);
}
{
char tmp = *s++;
if (tmp == '<') {
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '=' && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s -= 2;
TOKEN(0);
}
if (tmp == '=') {
tmp = *s++;
if (tmp == '>') {
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+ {
s -= 3;
TOKEN(0);
}
Eop(OP_NCMP);
}
s--;
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+ {
s -= 2;
TOKEN(0);
}
{
const char tmp = *s++;
if (tmp == '>') {
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '=' && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s -= 2;
TOKEN(0);
}
SHop(OP_RIGHT_SHIFT);
}
else if (tmp == '=') {
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
+ {
s -= 2;
TOKEN(0);
}
PL_tokenbuf[0] = '@';
s = scan_ident(s + 1, PL_tokenbuf + 1,
sizeof PL_tokenbuf - 1, FALSE);
- if (PL_expect == XOPERATOR)
- no_op("Array length", s);
+ if (PL_expect == XOPERATOR) {
+ d = s;
+ if (PL_bufptr > s) {
+ d = PL_bufptr-1;
+ PL_bufptr = PL_oldbufptr;
+ }
+ no_op("Array length", d);
+ }
if (!PL_tokenbuf[1])
PREREF(DOLSHARP);
PL_expect = XOPERATOR;
PL_tokenbuf[0] = '$';
s = scan_ident(s, PL_tokenbuf + 1,
sizeof PL_tokenbuf - 1, FALSE);
- if (PL_expect == XOPERATOR)
- no_op("Scalar", s);
+ if (PL_expect == XOPERATOR) {
+ d = s;
+ if (PL_bufptr > s) {
+ d = PL_bufptr-1;
+ PL_bufptr = PL_oldbufptr;
+ }
+ no_op("Scalar", d);
+ }
if (!PL_tokenbuf[1]) {
if (s == PL_bufend)
yyerror("Final $ should be \\$ or $name");
char *t = s+1;
while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
- t++;
+ t += UTF ? UTF8SKIP(t) : 1;
if (*t++ == ',') {
PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
while (t < PL_bufend && *t != ']')
t++;
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Multidimensional syntax %.*s not supported",
- (int)((t - PL_bufptr) + 1), PL_bufptr);
+ "Multidimensional syntax %"UTF8f" not supported",
+ UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
}
}
}
}
else if (PL_expect == XOPERATOR) {
s++;
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '=' && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s--;
TOKEN(0);
}
case '?': /* conditional */
s++;
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
+ {
s--;
TOKEN(0);
}
if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
char tmp = *s++;
if (*s == tmp) {
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
+ {
s--;
TOKEN(0);
}
pl_yylval.ival = 0;
OPERATOR(DOTDOT);
}
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
+ if (*s == '=' && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
s--;
TOKEN(0);
}
case '`':
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
- DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
+ DEBUG_T( {
+ if (s)
+ printbuf("### Saw backtick string before %s\n", s);
+ else
+ PerlIO_printf(Perl_debug_log,
+ "### Saw unterminated backtick string\n");
+ } );
if (PL_expect == XOPERATOR)
no_op("Backticks",s);
if (!s)
}
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
if (!isALPHA(*start) && (PL_expect == XTERM
- || PL_expect == XSTATE
+ || PL_expect == XREF || PL_expect == XSTATE
|| PL_expect == XTERMORDORDOR)) {
GV *const gv = gv_fetchpvn_flags(s, start - s,
UTF ? SVf_UTF8 : 0, SVt_PVCV);
char tmpbuf[sizeof PL_tokenbuf + 1];
*tmpbuf = '&';
Copy(PL_tokenbuf, tmpbuf+1, len, char);
- off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, len+1, 0);
if (off != NOT_IN_PAD) {
assert(off); /* we assume this is boolean-true below */
if (PAD_COMPNAME_FLAGS_isOUR(off)) {
CV *cv;
if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
(UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
- SVt_PVCV)) &&
- (cv = GvCVu(gv)))
+ SVt_PVCV))
+ && (cv = GvCVu(gv)))
{
if (GvIMPORTED_CV(gv))
ogv = gv;
else if (! CvMETHOD(cv))
hgv = gv;
}
- if (!ogv &&
- (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
- len, FALSE)) &&
- (gv = *gvp) && (
- isGV_with_GP(gv)
- ? GvCVu(gv) && GvIMPORTED_CV(gv)
- : SvPCS_IMPORTED(gv)
- && (gv_init(gv, PL_globalstash, PL_tokenbuf,
- len, 0), 1)
- ))
+ if (!ogv
+ && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
+ len, FALSE))
+ && (gv = *gvp)
+ && (isGV_with_GP(gv)
+ ? GvCVu(gv) && GvIMPORTED_CV(gv)
+ : SvPCS_IMPORTED(gv)
+ && (gv_init(gv, PL_globalstash, PL_tokenbuf,
+ len, 0), 1)))
{
ogv = gv;
}
in which case Foo is a bareword
(and a package name). */
- if (len > 2 &&
- PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
+ if (len > 2
+ && PL_tokenbuf[len - 2] == ':'
+ && PL_tokenbuf[len - 1] == ':')
{
if (ckWARN(WARN_BAREWORD)
&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
/* See if it's the indirect object for a list operator. */
- if (PL_oldoldbufptr &&
- PL_oldoldbufptr < PL_bufptr &&
- (PL_oldoldbufptr == PL_last_lop
- || PL_oldoldbufptr == PL_last_uni) &&
- /* NO SKIPSPACE BEFORE HERE! */
- (PL_expect == XREF ||
- ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
+ if (PL_oldoldbufptr
+ && PL_oldoldbufptr < PL_bufptr
+ && (PL_oldoldbufptr == PL_last_lop
+ || PL_oldoldbufptr == PL_last_uni)
+ && /* NO SKIPSPACE BEFORE HERE! */
+ (PL_expect == XREF
+ || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
+ == OA_FILEREF))
{
bool immediate_paren = *s == '(';
/* Two barewords in a row may indicate method call. */
- if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
- (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
+ if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
+ && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
+ {
goto method;
}
/* Also, if "_" follows a filetest operator, it's a bareword */
if (
- ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
- (!cv &&
- (PL_last_lop_op != OP_MAPSTART &&
- PL_last_lop_op != OP_GREPSTART))))
+ ( !immediate_paren && (PL_last_lop_op == OP_SORT
+ || (!cv
+ && (PL_last_lop_op != OP_MAPSTART
+ && PL_last_lop_op != OP_GREPSTART))))
|| (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
- && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
+ && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
+ == OA_FILESTATOP))
)
{
PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
op_free(rv2cv_op);
PL_last_lop = PL_oldbufptr;
PL_last_lop_op = OP_METHOD;
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ {
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+ }
PL_expect = XBLOCKTERM;
PL_bufptr = s;
return REPORT(METHOD);
else SvUTF8_off(sv);
}
op_free(rv2cv_op);
- if (tmp == METHOD && !PL_lex_allbrackets &&
- PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ if (tmp == METHOD && !PL_lex_allbrackets
+ && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ {
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+ }
return REPORT(tmp);
}
sv_setpvs(PL_subname, "__ANON__");
else
sv_setpvs(PL_subname, "__ANON__::__ANON__");
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ {
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+ }
PREBLOCK(LSTOPSUB);
}
}
NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
PL_expect = XTERM;
force_next(off ? PRIVATEREF : WORD);
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ {
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+ }
TOKEN(NOAMP);
}
if (!GvIO(gv))
GvIOp(gv) = newIO();
IoIFP(GvIOp(gv)) = PL_rsfp;
-#if defined(HAS_FCNTL) && defined(F_SETFD)
+#if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
{
const int fd = PerlIO_fileno(PL_rsfp);
- fcntl(fd,F_SETFD,fd >= 3);
+ if (fd >= 3) {
+ fcntl(fd,F_SETFD, FD_CLOEXEC);
+ }
}
#endif
/* Mark this internal pseudo-handle as clean */
if (!IN_BYTES) {
if (UTF)
PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
- else if (PL_encoding) {
+ else if (IN_ENCODING) {
SV *name;
dSP;
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(PL_encoding);
+ XPUSHs(_get_encoding());
PUTBACK;
call_method("name", G_SCALAR);
SPAGAIN;
}
case KEY___SUB__:
- FUN0OP(newPVOP(OP_RUNCV,0,NULL));
+ FUN0OP(CvCLONE(PL_compcv)
+ ? newOP(OP_RUNCV, 0)
+ : newPVOP(OP_RUNCV,0,NULL));
case KEY_AUTOLOAD:
case KEY_DESTROY:
if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
char *p = s;
- if ((PL_bufend - p) >= 3 &&
- strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
+ if ((PL_bufend - p) >= 3
+ && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
+ {
p += 2;
- else if ((PL_bufend - p) >= 4 &&
- strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
+ }
+ else if ((PL_bufend - p) >= 4
+ && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
p += 3;
p = skipspace(p);
/* skip optional package name, as in "for my abc $x (..)" */
case KEY_our:
case KEY_my:
case KEY_state:
+ if (PL_in_my) {
+ yyerror(Perl_form(aTHX_
+ "Can't redeclare \"%s\" in \"%s\"",
+ tmp == KEY_my ? "my" :
+ tmp == KEY_state ? "state" : "our",
+ PL_in_my == KEY_my ? "my" :
+ PL_in_my == KEY_state ? "state" : "our"));
+ }
PL_in_my = (U16)tmp;
s = skipspace(s);
if (isIDFIRST_lazy_if(s,UTF)) {
if (*s == '(' || (s = skipspace(s), *s == '('))
FUN1(OP_NOT);
else {
- if (!PL_lex_allbrackets &&
- PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ if (!PL_lex_allbrackets
+ && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
+ {
PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
+ }
OPERATOR(NOTOP);
}
}
if (!words)
words = newNULLLIST();
- if (PL_lex_stuff) {
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = NULL;
- }
+ SvREFCNT_dec_NN(PL_lex_stuff);
+ PL_lex_stuff = NULL;
PL_expect = XOPERATOR;
pl_yylval.opval = sawparens(words);
TOKEN(QWLIST);
d = s;
s = skipspace(s);
- if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
- (*s == ':' && s[1] == ':'))
+ if (isIDFIRST_lazy_if(s,UTF)
+ || *s == '\''
+ || (*s == ':' && s[1] == ':'))
{
PL_expect = XBLOCK;
*PL_tokenbuf = '&';
if (memchr(tmpbuf, ':', len) || key != KEY_sub
|| pad_findmy_pvn(
- PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+ PL_tokenbuf, len + 1, 0
) != NOT_IN_PAD)
sv_setpvn(PL_subname, tmpbuf, len);
else {
if (*s == ':' && s[1] != ':')
PL_expect = attrful;
- else if ((*s != '{' && *s != '(') && key == KEY_sub) {
+ else if ((*s != '{' && *s != '(') && key != KEY_format) {
+ assert(key == KEY_sub || key == KEY_AUTOLOAD ||
+ key == KEY_DESTROY || key == KEY_BEGIN ||
+ key == KEY_UNITCHECK || key == KEY_CHECK ||
+ key == KEY_INIT || key == KEY_END ||
+ key == KEY_my || key == KEY_state ||
+ key == KEY_our);
if (!have_name)
Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
else if (*s != ';' && *s != '}')
case KEY_x:
if (PL_expect == XOPERATOR) {
- if (*s == '=' && !PL_lex_allbrackets &&
- PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ if (*s == '=' && !PL_lex_allbrackets
+ && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
+ {
return REPORT(0);
+ }
Mop(OP_REPEAT);
}
check_uni();
if (!has_colon) {
if (!PL_in_my)
tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
- UTF ? SVf_UTF8 : 0);
+ 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
and @foo isn't a variable we can find in the symbol
table.
*/
- if (ckWARN(WARN_AMBIGUOUS) &&
- pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
+ if (ckWARN(WARN_AMBIGUOUS)
+ && pit == '@'
+ && PL_lex_state != LEX_NORMAL
+ && !PL_lex_brackets)
+ {
GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
/* DO NOT warn for @- and @+ */
- && !( PL_tokenbuf[2] == '\0' &&
- ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
+ && !( PL_tokenbuf[2] == '\0'
+ && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
)
{
/* Downgraded from fatal to warning 20000522 mjd */
char tmpbuf[256];
Copy(w, tmpbuf+1, s - w, char);
*tmpbuf = '&';
- off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
if (off != NOT_IN_PAD) return;
}
Perl_croak(aTHX_ "No comma allowed after %s", what);
yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
return SvREFCNT_inc_simple_NN(sv);
}
-now_ok:
+ now_ok:
cv = *cvp;
if (!pv && s)
pv = newSVpvn_flags(s, len, SVs_TEMP);
return s;
}
+/* Is the byte 'd' a legal single character identifier name? 'u' is true
+ * iff Unicode semantics are to be used. The legal ones are any of:
+ * a) all ASCII characters except:
+ * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
+ * 2) '{'
+ * The final case currently doesn't get this far in the program, so we
+ * don't test for it. If that were to change, it would be ok to allow it.
+ * c) When not under Unicode rules, any upper Latin1 character
+ * d) Otherwise, when unicode rules are used, all XIDS characters.
+ *
+ * Because all ASCII characters have the same representation whether
+ * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
+ * '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
+#ifdef EBCDIC
+# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ (isGRAPH_A(*(s)) || ((is_utf8) \
+ ? isIDFIRST_utf8((U8*) (s)) \
+ : (isGRAPH_L1(*s) \
+ && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
+#else
+# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ (isGRAPH_A(*(s)) || ((is_utf8) \
+ ? isIDFIRST_utf8((U8*) (s)) \
+ : ! isASCII_utf8((U8*) (s))))
+#endif
+
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
{
PERL_ARGS_ASSERT_SCAN_IDENT;
- if (isSPACE(*s))
+ if (isSPACE(*s) || !*s)
s = skipspace(s);
if (isDIGIT(*s)) {
while (isDIGIT(*s)) {
*d++ = *s++;
}
}
- else {
+ else { /* See if it is a "normal" identifier */
parse_ident(&s, &d, e, 1, is_utf8);
}
*d = '\0';
PL_lex_state = LEX_INTERPENDMAYBE;
return s;
}
- if (*s == '$' && s[1] &&
- (isIDFIRST_lazy_if(s+1,is_utf8)
- || isDIGIT_A((U8)s[1])
- || s[1] == '$'
- || s[1] == '{'
- || strnEQ(s+1,"::",2)) )
+
+ /* Here, it is not a run-of-the-mill identifier name */
+
+ if (*s == '$' && s[1]
+ && (isIDFIRST_lazy_if(s+1,is_utf8)
+ || isDIGIT_A((U8)s[1])
+ || s[1] == '$'
+ || s[1] == '{'
+ || strnEQ(s+1,"::",2)) )
{
/* Dereferencing a value in a scalar variable.
The alternatives are different syntaxes for a scalar variable.
s = skipspace(s);
}
}
-
-/* Is the byte 'd' a legal single character identifier name? 'u' is true
- * iff Unicode semantics are to be used. The legal ones are any of:
- * a) all ASCII characters except:
- * 1) space-type ones, like \t and SPACE;
- 2) NUL;
- * 3) '{'
- * The final case currently doesn't get this far in the program, so we
- * don't test for it. If that were to change, it would be ok to allow it.
- * c) When not under Unicode rules, any upper Latin1 character
- * d) Otherwise, when unicode rules are used, all XIDS characters.
- *
- * Because all ASCII characters have the same representation whether
- * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not */
-#ifdef EBCDIC
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8((U8*) (s)) \
- : (isGRAPH_L1(*s) \
- && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-# define VALID_LEN_ONE_IDENT(s, is_utf8) (! isSPACE_A(*(s)) \
- && LIKELY(*(s) != '\0') \
- && (! is_utf8 \
- || isASCII_utf8((U8*) (s)) \
- || isIDFIRST_utf8((U8*) (s))))
-#endif
if ((s <= PL_bufend - (is_utf8)
? UTF8SKIP(s)
: 1)
: (! isGRAPH_L1( (U8) *s)
|| UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
{
- /* Split messages for back compat */
- if (isCNTRL_A( (U8) *s)) {
- deprecate("literal control characters in variable names");
- }
- else {
- deprecate("literal non-graphic characters in variable names");
- }
+ deprecate("literal non-graphic characters in variable names");
}
-
+
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
/* if it starts as a valid identifier, assume that it is one.
(the later check for } being at the expected point will trap
cases where this doesn't pan out.) */
- d += is_utf8 ? UTF8SKIP(d) : 1;
- parse_ident(&s, &d, e, 1, is_utf8);
+ d += is_utf8 ? UTF8SKIP(d) : 1;
+ parse_ident(&s, &d, e, 1, is_utf8);
*d = '\0';
tmp_copline = CopLINE(PL_curcop);
if (s < PL_bufend && isSPACE(*s)) {
PL_expect = XREF;
}
if (PL_lex_state == LEX_NORMAL) {
- if (ckWARN(WARN_AMBIGUOUS) &&
- (keyword(dest, d - dest, 0)
- || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
+ if (ckWARN(WARN_AMBIGUOUS)
+ && (keyword(dest, d - dest, 0)
+ || get_cvn_flags(dest, d - dest, is_utf8
+ ? SVf_UTF8
+ : 0)))
{
SV *tmp = newSVpvn_flags( dest, d - dest,
- SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
+ SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
if (funny == '#')
funny = '@';
orig_copline = CopLINE(PL_curcop);
first_line = CopLINE(PL_curcop);
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s) {
- if (PL_lex_stuff) {
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = NULL;
- }
+ SvREFCNT_dec_NN(PL_lex_stuff);
+ PL_lex_stuff = NULL;
Perl_croak(aTHX_ "Substitution replacement not terminated");
}
PL_multi_start = first_start; /* so whole substitution is taken together */
s = scan_str(s,FALSE,FALSE,FALSE,NULL);
if (!s) {
- if (PL_lex_stuff) {
- SvREFCNT_dec(PL_lex_stuff);
- PL_lex_stuff = NULL;
- }
+ SvREFCNT_dec_NN(PL_lex_stuff);
+ PL_lex_stuff = NULL;
Perl_croak(aTHX_ "Transliteration replacement not terminated");
}
term = '"';
if (!isWORDCHAR_lazy_if(s,UTF))
deprecate("bare << to mean <<\"\"");
- for (; isWORDCHAR_lazy_if(s,UTF); s++) {
- if (d < e)
- *d++ = *s;
+ peek = s;
+ while (isWORDCHAR_lazy_if(peek,UTF)) {
+ peek += UTF ? UTF8SKIP(peek) : 1;
}
+ len = (peek - s >= e - d) ? (e - d) : (peek - s);
+ Copy(s, d, len, char);
+ s += len;
+ d += len;
}
if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
Perl_croak(aTHX_ "Delimiter for here document is too long");
lexing scope. In a file, we will have broken out of the
loop in the previous iteration. In an eval, the string buf-
fer ends with "\n;", so the while condition above will have
- evaluated to false. So shared can never be null. */
- assert(shared);
+ evaluated to false. So shared can never be null. Or so you
+ might think. Odd syntax errors like s;@{<<; can gobble up
+ the implicit semicolon at the end of a flie, causing the
+ file handle to be closed even when we are not in a string
+ eval. So shared may be null in that case. */
+ if (UNLIKELY(!shared))
+ goto interminable;
/* A LEXSHARED struct with a null ls_prev pointer is the outer-
most lexing scope. In a file, shared->ls_linestr at that
level is just one line, so there is no body to steal. */
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
d = s;
- while (s < bufend - len + 1 &&
- memNE(s,PL_tokenbuf,len) ) {
+ while (s < bufend - len + 1
+ && memNE(s,PL_tokenbuf,len) )
+ {
if (*s++ == '\n')
++PL_parser->herelines;
}
bufend - shared->re_eval_start);
shared->re_eval_start -= s-d;
}
- if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
- CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
- cx->blk_eval.cur_text == linestr)
+ if (cxstack_ix >= 0
+ && CxTYPE(cx) == CXt_EVAL
+ && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
+ && cx->blk_eval.cur_text == linestr)
{
cx->blk_eval.cur_text = newSVsv(linestr);
SvSCREAM_on(cx->blk_eval.cur_text);
origline + 1 + PL_parser->herelines);
if (!lex_next_chunk(LEX_NO_TERM)
&& (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
- SvREFCNT_dec(linestr_save);
+ /* Simply freeing linestr_save might seem simpler here, as it
+ does not matter what PL_linestr points to, since we are
+ about to croak; but in a quote-like op, linestr_save
+ will have been prospectively freed already, via
+ SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+ restore PL_linestr. */
+ SvREFCNT_dec_NN(PL_linestr);
+ PL_linestr = linestr_save;
goto interminable;
}
CopLINE_set(PL_curcop, origline);
PL_last_lop = PL_last_uni = NULL;
#ifndef PERL_STRICT_CR
if (PL_bufend - PL_linestart >= 2) {
- if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
- (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+ if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+ || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
{
PL_bufend[-2] = '\n';
PL_bufend--;
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
- else if (PL_encoding)
- sv_recode_to_utf8(tmpstr, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(tmpstr, _get_encoding());
}
PL_lex_stuff = tmpstr;
pl_yylval.ival = op_type;
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
else {
GV *gv;
++d;
-intro_sym:
+ intro_sym:
gv = gv_fetchpv(d,
GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
SVt_PV);
sv_catpvn(sv, s, termlen);
s += termlen;
for (;;) {
- if (PL_encoding && !UTF && !re_reparse) {
+ if (IN_ENCODING && !UTF && !re_reparse) {
bool cont = TRUE;
while (cont) {
int offset = s - SvPVX_const(PL_linestr);
- const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
+ const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
&offset, (char*)termstr, termlen);
const char *ns;
char *svlast;
COPLINE_INC_WITH_HERELINES;
/* backslashes can escape the open or closing characters */
if (*s == '\\' && s+1 < PL_bufend) {
- if (!keep_bracketed_quoted &&
- ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
+ if (!keep_bracketed_quoted
+ && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
{
s++;
}
#ifndef PERL_STRICT_CR
if (to - SvPVX_const(sv) >= 2) {
- if ((to[-2] == '\r' && to[-1] == '\n') ||
- (to[-2] == '\n' && to[-1] == '\r'))
+ if ( (to[-2] == '\r' && to[-1] == '\n')
+ || (to[-2] == '\n' && to[-1] == '\r'))
{
to[-2] = '\n';
to--;
/* at this point, we have successfully read the delimited string */
- if (!PL_encoding || UTF || re_reparse) {
+ if (!IN_ENCODING || UTF || re_reparse) {
if (keep_delims)
sv_catpvn(sv, s, termlen);
s += termlen;
}
- if (has_utf8 || (PL_encoding && !re_reparse))
+ if (has_utf8 || (IN_ENCODING && !re_reparse))
SvUTF8_on(sv);
PL_multi_end = CopLINE(PL_curcop);
hexfp_exp *= 10;
hexfp_exp += *h - '0';
#ifdef NV_MIN_EXP
- if (negexp &&
- -hexfp_exp < NV_MIN_EXP - 1) {
+ if (negexp
+ && -hexfp_exp < NV_MIN_EXP - 1) {
Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
"Hexadecimal float: exponent underflow");
-#endif
break;
}
- else {
+#endif
#ifdef NV_MAX_EXP
- if (!negexp &&
- hexfp_exp > NV_MAX_EXP - 1) {
- Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
+ if (!negexp
+ && hexfp_exp > NV_MAX_EXP - 1) {
+ Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
"Hexadecimal float: exponent overflow");
- break;
- }
-#endif
+ break;
}
+#endif
}
h++;
}
}
/* read next group of digits and _ and copy into d */
- while (isDIGIT(*s) || *s == '_' ||
- UNLIKELY(hexfp && isXDIGIT(*s))) {
+ while (isDIGIT(*s)
+ || *s == '_'
+ || UNLIKELY(hexfp && isXDIGIT(*s)))
+ {
/* skip underscores, checking for misplaced ones
if -w is on
*/
/* copy, ignoring underbars, until we run out of digits.
*/
- for (; isDIGIT(*s) || *s == '_' ||
- UNLIKELY(hexfp && isXDIGIT(*s));
- s++) {
+ for (; isDIGIT(*s)
+ || *s == '_'
+ || UNLIKELY(hexfp && isXDIGIT(*s));
+ s++)
+ {
/* fixed length buffer check */
if (d >= e)
Perl_croak(aTHX_ "%s", number_too_long);
*d++ = *s++;
}
else {
- if (((lastub && s == lastub + 1) ||
- (!isDIGIT(s[1]) && s[1] != '_')))
+ if (((lastub && s == lastub + 1)
+ || (!isDIGIT(s[1]) && s[1] != '_')))
Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
"Misplaced _ in number");
lastub = s++;
floatit = TRUE;
}
if (floatit) {
- STORE_NUMERIC_LOCAL_SET_STANDARD();
+ STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
/* terminate the string */
*d = '\0';
if (UNLIKELY(hexfp)) {
} else {
nv = Atof(PL_tokenbuf);
}
- RESTORE_NUMERIC_LOCAL();
+ RESTORE_LC_NUMERIC_UNDERLYING();
sv = newSVnv(nv);
}
/* if it starts with a v, it could be a v-string */
case 'v':
-vstring:
+ vstring:
sv = newSV(5); /* preallocate storage space */
ENTER_with_name("scan_vstring");
SAVEFREESV(sv);
if (needargs) {
const char *s2 = s;
while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
- || *s2 == 013)
+ || *s2 == '\v')
s2++;
if (*s2 == '{') {
PL_expect = XTERMBLOCK;
if (!IN_BYTES) {
if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
SvUTF8_on(stuff);
- else if (PL_encoding)
- sv_recode_to_utf8(stuff, PL_encoding);
+ else if (IN_ENCODING)
+ sv_recode_to_utf8(stuff, _get_encoding());
}
NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
force_next(THING);
CvFLAGS(PL_compcv) |= flags;
PL_subline = CopLINE(PL_curcop);
- CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB));
+ CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
if (outsidecv && CvPADLIST(outsidecv))
- CvPADLIST(PL_compcv)->xpadl_outid =
- PadlistNAMES(CvPADLIST(outsidecv));
+ CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
return oldsavestack_ix;
}
PL_in_eval |= EVAL_WARNONLY;
yyerror_pv(s, flags);
- PL_in_eval &= ~EVAL_WARNONLY;
return 0;
}
if (!yychar || (yychar == ';' && !PL_rsfp))
sv_catpvs(where_sv, "at EOF");
- else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
- PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
- PL_oldbufptr != PL_bufptr) {
+ else if ( PL_oldoldbufptr
+ && PL_bufptr > PL_oldoldbufptr
+ && PL_bufptr - PL_oldoldbufptr < 200
+ && PL_oldoldbufptr != PL_oldbufptr
+ && PL_oldbufptr != PL_bufptr)
+ {
/*
Only for NetWare:
The code below is removed for NetWare because it abends/crashes on NetWare
context = PL_oldoldbufptr;
contlen = PL_bufptr - PL_oldoldbufptr;
}
- else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
- PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
+ else if ( PL_oldbufptr
+ && PL_bufptr > PL_oldbufptr
+ && PL_bufptr - PL_oldbufptr < 200
+ && PL_oldbufptr != PL_bufptr) {
/*
Only for NetWare:
The code below is removed for NetWare because it abends/crashes on NetWare
}
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
- else if (yychar == -2) { /* YYEMPTY */
- if (PL_lex_state == LEX_NORMAL ||
- (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
+ else if (yychar == YYEMPTY) {
+ if ( PL_lex_state == LEX_NORMAL
+ || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
sv_catpvs(where_sv, "at end of line");
else if (PL_lex_inpat)
sv_catpvs(where_sv, "within pattern");
PL_multi_end = 0;
}
if (PL_in_eval & EVAL_WARNONLY) {
+ PL_in_eval &= ~EVAL_WARNONLY;
Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
}
else
down to the bit shift operators. The expression must be followed (and thus
terminated) either by a comparison or lower-precedence operator or by
something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
otherwise it is mandatory. It is up to the caller to ensure that the
dynamic parser state (L</PL_parser> et al) is correctly set to reflect
the source of the code to be parsed and the lexical context for the
down to the assignment operators. The expression must be followed (and thus
terminated) either by a comma or lower-precedence operator or by
something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
otherwise it is mandatory. It is up to the caller to ensure that the
dynamic parser state (L</PL_parser> et al) is correctly set to reflect
the source of the code to be parsed and the lexical context for the
down to the comma operator. The expression must be followed (and thus
terminated) either by a low-precedence logic operator such as C<or> or by
something that would normally terminate an expression such as semicolon.
-If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
+If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
otherwise it is mandatory. It is up to the caller to ensure that the
dynamic parser state (L</PL_parser> et al) is correctly set to reflect
the source of the code to be parsed and the lexical context for the
as C<or>. The expression must be followed (and thus terminated) by a
token that an expression would normally be terminated by: end-of-file,
closing bracketing punctuation, semicolon, or one of the keywords that
-signals a postfix expression-statement modifier. If I<flags> includes
-C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
+signals a postfix expression-statement modifier. If C<flags> has the
+C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
mandatory. It is up to the caller to ensure that the dynamic parser
state (L</PL_parser> et al) is correctly set to reflect the source of
the code to be parsed and the lexical context for the expression.
level of parsing which covers all the compilation errors that occurred.
Some compilation errors, however, will throw an exception immediately.
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
be zero.
=cut
level of parsing which covers all the compilation errors that occurred.
Some compilation errors, however, will throw an exception immediately.
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
be zero.
=cut
Parse a single label, possibly optional, of the type that may prefix a
Perl statement. It is up to the caller to ensure that the dynamic parser
state (L</PL_parser> et al) is correctly set to reflect the source of
-the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
+the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
label is optional, otherwise it is mandatory.
The name of the label is returned in the form of a fresh scalar. If an
level of parsing which covers all the compilation errors that occurred.
Some compilation errors, however, will throw an exception immediately.
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
be zero.
=cut
which covers all the compilation errors that occurred. Some compilation
errors, however, will throw an exception immediately.
-The I<flags> parameter is reserved for future use, and must always
+The C<flags> parameter is reserved for future use, and must always
be zero.
=cut
"lacks default expression"));
} else {
OP *defexpr = parse_termexpr(0);
- if (defexpr->op_type == OP_UNDEF &&
- !(defexpr->op_flags & OPf_KIDS)) {
+ if (defexpr->op_type == OP_UNDEF
+ && !(defexpr->op_flags & OPf_KIDS))
+ {
op_free(defexpr);
} else {
OP *ifop =
}
/*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
* ex: set ts=8 sts=4 sw=4 et:
*/