NOOP;
if (t < PL_bufptr && isSPACE(*t))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Do you need to predeclare %"SVf"?)\n",
- SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "\t(Do you need to predeclare %"UTF8f"?)\n",
+ UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
}
else {
assert(s >= oldbp);
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "\t(Missing operator before %"SVf"?)\n",
- SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "\t(Missing operator before %"UTF8f"?)\n",
+ UTF8fARG(UTF, s - oldbp, oldbp));
}
}
PL_bufptr = oldbp;
if (nl)
*nl = '\0';
}
- else if (isCNTRL(PL_multi_close)) {
+ else if ((U8) PL_multi_close < 32) {
*tmpbuf = '^';
tmpbuf[1] = (char)toCTRL(PL_multi_close);
tmpbuf[2] = '\0';
parser->linestr = flags & LEX_START_COPIED
? SvREFCNT_inc_simple_NN(line)
: newSVpvn_flags(s, len, SvUTF8(line));
- sv_catpvs(parser->linestr, "\n;");
+ sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
} else {
- parser->linestr = newSVpvs("\n;");
+ parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
}
parser->oldoldbufptr =
parser->oldbufptr =
ENTER;
SAVESPTR(PL_warnhook);
PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
+ utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
LEAVE;
}
}
}
else {
assert(p < e -1 );
- *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
+ *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
p += 2;
}
}
bufend = PL_parser->bufend;
}
}
- unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
+ unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
if (retlen == (STRLEN)-1) {
/* malformed UTF-8 */
ENTER;
SAVESPTR(PL_warnhook);
PL_warnhook = PERL_WARNHOOK_FATAL;
- utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
+ utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
LEAVE;
}
return unichar;
=cut
*/
+#define LEX_NO_INCLINE 0x40000000
#define LEX_NO_NEXT_CHUNK 0x80000000
void
Perl_lex_read_space(pTHX_ U32 flags)
{
char *s, *bufend;
+ const bool can_incline = !(flags & LEX_NO_INCLINE);
bool need_incline = 0;
- if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
+ if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
#ifdef PERL_MAD
if (PL_skipwhite) {
} while (!(c == '\n' || (c == 0 && s == bufend)));
} else if (c == '\n') {
s++;
- PL_parser->linestart = s;
- if (s == bufend)
- need_incline = 1;
- else
- incline(s);
+ if (can_incline) {
+ PL_parser->linestart = s;
+ if (s == bufend)
+ need_incline = 1;
+ else
+ incline(s);
+ }
} else if (isSPACE(c)) {
s++;
} else if (c == 0 && s == bufend) {
if (flags & LEX_NO_NEXT_CHUNK)
break;
PL_parser->bufptr = s;
- COPLINE_INC_WITH_HERELINES;
+ if (can_incline) COPLINE_INC_WITH_HERELINES;
got_more = lex_next_chunk(flags);
- CopLINE_dec(PL_curcop);
+ if (can_incline) CopLINE_dec(PL_curcop);
s = PL_parser->bufptr;
bufend = PL_parser->bufend;
if (!got_more)
break;
- if (need_incline && PL_parser->rsfp) {
+ if (can_incline && need_incline && PL_parser->rsfp) {
incline(s);
need_incline = 0;
}
}
/*
+
+=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
+
+This function performs syntax checking on a prototype, C<proto>.
+If C<warn> is true, any illegal characters or mismatched brackets
+will trigger illegalproto warnings, declaring that they were
+detected in the prototype for C<name>.
+
+The return value is C<true> if this is a valid prototype, and
+C<false> if it is not, regardless of whether C<warn> was C<true> or
+C<false>.
+
+Note that C<NULL> is a valid C<proto> and will always return C<true>.
+
+=cut
+
+ */
+
+bool
+Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
+{
+ STRLEN len, origlen;
+ char *p = proto ? SvPV(proto, len) : NULL;
+ bool bad_proto = FALSE;
+ bool in_brackets = FALSE;
+ bool after_slash = FALSE;
+ char greedy_proto = ' ';
+ bool proto_after_greedy_proto = FALSE;
+ bool must_be_last = FALSE;
+ bool underscore = FALSE;
+ bool bad_proto_after_underscore = FALSE;
+
+ PERL_ARGS_ASSERT_VALIDATE_PROTO;
+
+ if (!proto)
+ return TRUE;
+
+ origlen = len;
+ for (; len--; p++) {
+ if (!isSPACE(*p)) {
+ if (must_be_last)
+ proto_after_greedy_proto = TRUE;
+ if (underscore) {
+ if (!strchr(";@%", *p))
+ bad_proto_after_underscore = TRUE;
+ underscore = FALSE;
+ }
+ if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
+ bad_proto = TRUE;
+ }
+ else {
+ if (*p == '[')
+ in_brackets = TRUE;
+ else if (*p == ']')
+ in_brackets = FALSE;
+ else if ((*p == '@' || *p == '%') &&
+ !after_slash &&
+ !in_brackets ) {
+ must_be_last = TRUE;
+ greedy_proto = *p;
+ }
+ else if (*p == '_')
+ underscore = TRUE;
+ }
+ if (*p == '\\')
+ after_slash = TRUE;
+ else
+ after_slash = FALSE;
+ }
+ }
+
+ if (warn) {
+ SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
+ p -= origlen;
+ p = SvUTF8(proto)
+ ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
+ origlen, UNI_DISPLAY_ISPRINT)
+ : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
+
+ if (proto_after_greedy_proto)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Prototype after '%c' for %"SVf" : %s",
+ greedy_proto, SVfARG(name), p);
+ if (in_brackets)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Missing ']' in prototype for %"SVf" : %s",
+ SVfARG(name), p);
+ if (bad_proto)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Illegal character in prototype for %"SVf" : %s",
+ SVfARG(name), p);
+ if (bad_proto_after_underscore)
+ Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
+ "Illegal character after '_' in prototype for %"SVf" : %s",
+ SVfARG(name), p);
+ }
+
+ return (! (proto_after_greedy_proto || bad_proto) );
+}
+
+/*
* S_incline
* This subroutine has nothing to do with tilting, whether at windmills
* or pinball tables. Its name is short for "increment line". It
if (t - s > 0) {
const STRLEN len = t - s;
- SV *const temp_sv = CopFILESV(PL_curcop);
- const char *cf;
- STRLEN tmplen;
-
- if (temp_sv) {
- cf = SvPVX(temp_sv);
- tmplen = SvCUR(temp_sv);
- } else {
- cf = NULL;
- tmplen = 0;
- }
if (!PL_rsfp && !PL_parser->filtered) {
/* must copy *{"::_<(eval N)[oldfilename:L]"}
* to *{"::_<newfilename"} */
/* However, the long form of evals is only turned on by the
debugger - usually they're "(eval %lu)" */
- char smallbuf[128];
- char *tmpbuf;
- GV **gvp;
- STRLEN tmplen2 = len;
- if (tmplen + 2 <= sizeof smallbuf)
- tmpbuf = smallbuf;
- else
- Newx(tmpbuf, tmplen + 2, char);
- tmpbuf[0] = '_';
- tmpbuf[1] = '<';
- memcpy(tmpbuf + 2, cf, tmplen);
- tmplen += 2;
- gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
- if (gvp) {
+ GV * const cfgv = CopFILEGV(PL_curcop);
+ if (cfgv) {
+ char smallbuf[128];
+ STRLEN tmplen2 = len;
char *tmpbuf2;
GV *gv2;
else
Newx(tmpbuf2, tmplen2 + 2, char);
- if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
- /* Either they malloc'd it, or we malloc'd it,
- so no prefix is present in ours. */
- tmpbuf2[0] = '_';
- tmpbuf2[1] = '<';
- }
+ tmpbuf2[0] = '_';
+ tmpbuf2[1] = '<';
memcpy(tmpbuf2 + 2, s, tmplen2);
tmplen2 += 2;
alias the saved lines that are in the array.
Otherwise alias the whole array. */
if (CopLINE(PL_curcop) == line_num) {
- GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
- GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
+ GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
+ GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
}
- else if (GvAV(*gvp)) {
- AV * const av = GvAV(*gvp);
+ else if (GvAV(cfgv)) {
+ AV * const av = GvAV(cfgv);
const I32 start = CopLINE(PL_curcop)+1;
I32 items = AvFILLp(av) - start;
if (items > 0) {
if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
}
- if (tmpbuf != smallbuf) Safefree(tmpbuf);
}
CopFILE_free(PL_curcop);
CopFILE_setn(PL_curcop, s, len);
CopLINE_set(PL_curcop, line_num);
}
+#define skipspace(s) skipspace_flags(s, 0)
+
#ifdef PERL_MAD
/* skip space before PL_thistoken */
S_skipspace2(pTHX_ char *s, SV **svp)
{
char *start;
- const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
const I32 startoff = s - SvPVX(PL_linestr);
PERL_ARGS_ASSERT_SKIPSPACE2;
s = skipspace(s);
- PL_bufptr = SvPVX(PL_linestr) + bufptroff;
if (!PL_madskills || !svp)
return s;
start = SvPVX(PL_linestr) + startoff;
if (av) {
SV * const sv = newSV_type(SVt_PVMG);
if (orig_sv)
- sv_setsv(sv, orig_sv);
+ sv_setsv_flags(sv, orig_sv, 0); /* no cow */
else
sv_setpvn(sv, buf, len);
(void)SvIOK_on(sv);
SvIV_set(sv, 0);
- av_store(av, (I32)CopLINE(PL_curcop), sv);
+ av_store(av, CopLINE(PL_curcop), sv);
}
}
*/
STATIC char *
-S_skipspace(pTHX_ char *s)
+S_skipspace_flags(pTHX_ char *s, U32 flags)
{
#ifdef PERL_MAD
char *start = s;
#endif /* PERL_MAD */
- PERL_ARGS_ASSERT_SKIPSPACE;
+ PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
#ifdef PERL_MAD
if (PL_skipwhite) {
sv_free(PL_skipwhite);
} else {
STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
PL_bufptr = s;
- lex_read_space(LEX_KEEP_PREVIOUS |
+ lex_read_space(flags | LEX_KEEP_PREVIOUS |
(PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
LEX_NO_NEXT_CHUNK : 0));
s = PL_bufptr;
return THING;
}
else if (op_type == OP_BACKTICK && PL_lex_op) {
- /* readpipe() vas overriden */
+ /* readpipe() was overridden */
cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
pl_yylval.opval = PL_lex_op;
PL_lex_op = NULL;
{
/* If warnings are on, this will print a more detailed analysis of what
* is wrong than the error message below */
- utf8n_to_uvuni(first_bad_char_loc,
+ utf8n_to_uvchr(first_bad_char_loc,
e - ((char *) first_bad_char_loc),
NULL, 0);
}
s++;
} else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
+ if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
goto bad_charname;
}
s += 2;
s++;
}
else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
- if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
- *(s+1)))))
+ if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
{
goto bad_charname;
}
if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
/* If warnings are on, this will print a more detailed analysis of
* what is wrong than the error message below */
- utf8n_to_uvuni(first_bad_char_loc,
+ utf8n_to_uvchr(first_bad_char_loc,
(char *) first_bad_char_loc - str,
NULL, 0);
char *e = d++;
while (e-- > c)
*(e + 1) = *e;
- *c = (char)UTF_TO_NATIVE(0xff);
+ *c = (char) ILLEGAL_UTF8_BYTE;
/* mark the range as done, and continue */
dorange = FALSE;
didrange = TRUE;
#ifdef EBCDIC
if (literal_endpoint == 2 &&
- ((isLOWER(min) && isLOWER(max)) ||
- (isUPPER(min) && isUPPER(max)))) {
- if (isLOWER(min)) {
- for (i = min; i <= max; i++)
- if (isLOWER(i))
- *d++ = NATIVE_TO_NEED(has_utf8,i);
- } else {
- for (i = min; i <= max; i++)
- if (isUPPER(i))
- *d++ = NATIVE_TO_NEED(has_utf8,i);
+ ((isLOWER_A(min) && isLOWER_A(max)) ||
+ (isUPPER_A(min) && isUPPER_A(max))))
+ {
+ for (i = min; i <= max; i++) {
+ if (isALPHA_A(i))
+ *d++ = i;
}
}
else
for (i = min; i <= max; i++)
#ifdef EBCDIC
if (has_utf8) {
- const U8 ch = (U8)NATIVE_TO_UTF(i);
- if (UNI_IS_INVARIANT(ch))
- *d++ = (U8)i;
- else {
- *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
- *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
- }
+ append_utf8_from_native_byte(i, &d);
}
else
#endif
if (uvmax) {
d = (char*)uvchr_to_utf8((U8*)d, 0x100);
if (uvmax > 0x101)
- *d++ = (char)UTF_TO_NATIVE(0xff);
+ *d++ = (char) ILLEGAL_UTF8_BYTE;
if (uvmax > 0x100)
d = (char*)uvchr_to_utf8((U8*)d, uvmax);
}
&& !native_range
#endif
) {
- *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
+ *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
s++;
continue;
}
* char, which will be done separately.
* Stop on (?{..}) and friends */
- else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
+ else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
if (s[2] == '#') {
while (s+1 < send && *s != ')')
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
- else if (!PL_lex_casemods && !in_charclass &&
+ else if (!PL_lex_casemods &&
( s[2] == '{' /* This should match regcomp.c */
|| (s[2] == '?' && s[3] == '{')))
{
}
/* likewise skip #-initiated comments in //x patterns */
- else if (*s == '#' && PL_lex_inpat &&
+ else if (*s == '#' && PL_lex_inpat && !in_charclass &&
((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
while (s+1 < send && *s != '\n')
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
/* no further processing of single-quoted regex */
|| s[1] != '{'
|| regcurly(s + 1, FALSE)))
{
- *d++ = NATIVE_TO_NEED(has_utf8,'\\');
+ *d++ = '\\';
goto default_action;
}
{
I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
STRLEN len = 3;
- uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
+ uv = grok_oct(s, &len, &flags, NULL);
s += len;
if (len < 3 && s < send && isDIGIT(*s)
&& ckWARN(WARN_MISC))
* UTF-8 sequence they can end up as, except if they force us
* to recode the rest of the string into utf8 */
- /* Here uv is the ordinal of the next character being added in
- * unicode (converted from native). */
- if (!UNI_IS_INVARIANT(uv)) {
+ /* Here uv is the ordinal of the next character being added */
+ if (!NATIVE_IS_INVARIANT(uv)) {
if (!has_utf8 && uv > 255) {
/* Might need to recode whatever we have accumulated so
* far if it contains any chars variant in utf8 or
}
if (has_utf8) {
- d = (char*)uvuni_to_utf8((U8*)d, uv);
+ d = (char*)uvchr_to_utf8((U8*)d, uv);
if (PL_lex_inwhat == OP_TRANS &&
PL_sublex_info.sub_op) {
PL_sublex_info.sub_op->op_private |=
* now, while preserving the fact that it was a named character
* so that the regex compiler knows this */
- /* This section of code doesn't generally use the
- * NATIVE_TO_NEED() macro to transform the input. I (khw) did
- * a close examination of this macro and determined it is a
- * no-op except on utfebcdic variant characters. Every
- * character generated by this that would normally need to be
- * enclosed by this macro is invariant, so the macro is not
- * needed, and would complicate use of copy(). XXX There are
- * other parts of this file where the macro is used
- * inconsistently, but are saved by it being a no-op */
-
/* The structure of this section of code (besides checking for
* errors and upgrading to utf8) is:
* Further disambiguate between the two meanings of \N, and if
has_utf8 = TRUE;
}
- /* Add the string to the output */
+ /* Add the (Unicode) code point to the output. */
if (UNI_IS_INVARIANT(uv)) {
- *d++ = (char) uv;
+ *d++ = (char) LATIN1_TO_NATIVE(uv);
}
- else d = (char*)uvuni_to_utf8((U8*)d, uv);
+ else {
+ d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
+ }
}
}
else /* Here is \N{NAME} but not \N{U+...}. */
char hex_string[2 * UTF8_MAXBYTES + 5];
/* Get the first character of the result. */
- U32 uv = utf8n_to_uvuni((U8 *) str,
+ U32 uv = utf8n_to_uvchr((U8 *) str,
len,
&char_length,
UTF8_ALLOW_ANYUV);
/* Convert first code point to hex, including
- * the boiler plate before it. For all these,
- * we convert to native format so that
- * downstream code can continue to assume the
- * input is native */
+ * the boiler plate before it. */
output_length =
my_snprintf(hex_string, sizeof(hex_string),
- "\\N{U+%X",
- (unsigned int) UNI_TO_NATIVE(uv));
+ "\\N{U+%X",
+ (unsigned int) uv);
/* Make sure there is enough space to hold it */
d = off + SvGROW(sv, off
* its ordinal in hex */
while ((str += char_length) < str_end) {
const STRLEN off = d - SvPVX_const(sv);
- U32 uv = utf8n_to_uvuni((U8 *) str,
+ U32 uv = utf8n_to_uvchr((U8 *) str,
str_end - str,
&char_length,
UTF8_ALLOW_ANYUV);
output_length =
my_snprintf(hex_string,
- sizeof(hex_string),
- ".%X",
- (unsigned int) UNI_TO_NATIVE(uv));
+ sizeof(hex_string),
+ ".%X",
+ (unsigned int) uv);
d = off + SvGROW(sv, off
+ output_length
/* printf-style backslashes, formfeeds, newlines, etc */
case 'b':
- *d++ = NATIVE_TO_NEED(has_utf8,'\b');
+ *d++ = '\b';
break;
case 'n':
- *d++ = NATIVE_TO_NEED(has_utf8,'\n');
+ *d++ = '\n';
break;
case 'r':
- *d++ = NATIVE_TO_NEED(has_utf8,'\r');
+ *d++ = '\r';
break;
case 'f':
- *d++ = NATIVE_TO_NEED(has_utf8,'\f');
+ *d++ = '\f';
break;
case 't':
- *d++ = NATIVE_TO_NEED(has_utf8,'\t');
+ *d++ = '\t';
break;
case 'e':
- *d++ = ASCII_TO_NEED(has_utf8,'\033');
+ *d++ = ASCII_TO_NATIVE('\033');
break;
case 'a':
- *d++ = ASCII_TO_NEED(has_utf8,'\007');
+ *d++ = '\a';
break;
} /* end switch */
* routine that does the conversion checks for errors like
* malformed utf8 */
- const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
- const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
+ const UV nextuv = (this_utf8)
+ ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
+ : (UV) ((U8) *s);
+ const STRLEN need = UNISKIP(nextuv);
if (!has_utf8) {
SvCUR_set(sv, d - SvPVX_const(sv));
SvPOK_on(sv);
#endif
}
else {
- *d++ = NATIVE_TO_NEED(has_utf8,*s++);
+ *d++ = *s++;
}
} /* while loop to process each character */
char *d;
STRLEN len;
bool bof = FALSE;
+ const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
U8 formbrack = 0;
U32 fake_eof = 0;
return yylex();
}
+ /* We really do *not* want PL_linestr ever becoming a COW. */
+ assert (!SvIsCOW(PL_linestr));
s = PL_bufptr;
PL_oldoldbufptr = PL_oldbufptr;
PL_oldbufptr = s;
+ PL_parser->saw_infix_sigil = 0;
retry:
#ifdef PERL_MAD
* check if it in fact is. */
if (bof && PL_rsfp &&
(*s == 0 ||
- *(U8*)s == 0xEF ||
+ *(U8*)s == BOM_UTF8_FIRST_BYTE ||
*(U8*)s >= 0xFE ||
s[1] == 0)) {
Off_t offset = (IV)PerlIO_tell(PL_rsfp);
s = SKIPSPACE0(s);
}
else {
-/* if (PL_madskills && PL_lex_formbrack) { */
- d = s;
- while (d < PL_bufend && *d != '\n')
- d++;
- if (d < PL_bufend)
- d++;
- else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
+#endif
+ if (PL_madskills) d = s;
+ while (s < PL_bufend && *s != '\n')
+ s++;
+ if (s < PL_bufend)
+ {
+ s++;
+ if (s < PL_bufend)
+ incline(s);
+ }
+ else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
Perl_croak(aTHX_ "panic: input overflow");
+#ifdef PERL_MAD
if (PL_madskills && CopLINE(PL_curcop) >= 1) {
if (!PL_thiswhite)
PL_thiswhite = newSVpvs("");
sv_setpvs(PL_thiswhite, "");
PL_faketokens = 0;
}
- sv_catpvn(PL_thiswhite, s, d - s);
+ sv_catpvn(PL_thiswhite, d, s - d);
}
- s = d;
-/* }
- *s = '\0';
- PL_bufend = s; */
}
-#else
- *s = '\0';
- PL_bufend = s;
#endif
}
goto retry;
s--;
TOKEN(0);
}
+ PL_parser->saw_infix_sigil = 1;
Mop(OP_MULTIPLY);
case '%':
PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
TOKEN(0);
++s;
+ PL_parser->saw_infix_sigil = 1;
Mop(OP_MODULO);
}
PL_tokenbuf[0] = '%';
s--;
TOKEN(0);
}
+ PL_parser->saw_infix_sigil = 1;
BAop(OP_BIT_AND);
}
if (*t == ';'
&& get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "You need to quote \"%"SVf"\"",
- SVfARG(newSVpvn_flags(tmpbuf, len,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "You need to quote \"%"UTF8f"\"",
+ UTF8fARG(UTF, len, tmpbuf));
}
}
}
PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
/* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
- "Scalar value %"SVf" better written as $%"SVf,
- SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
- SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
+ "Scalar value %"UTF8f" better written as $%"UTF8f,
+ UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
+ UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
}
}
}
/* Is this a word before a => operator? */
if (*d == '=' && d[1] == '>') {
+ fat_arrow:
CLINE;
pl_yylval.opval
= (OP*)newSVOP(OP_CONST, 0,
}
}
+ if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
+ && (!anydelim || *s != '#')) {
+ /* no override, and not s### either; skipspace is safe here
+ * check for => on following line */
+ STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
+ STRLEN soff = s - SvPVX(PL_linestr);
+ s = skipspace_flags(s, LEX_NO_INCLINE);
+ if (*s == '=' && s[1] == '>') goto fat_arrow;
+ PL_bufptr = SvPVX(PL_linestr) + bufoff;
+ s = SvPVX(PL_linestr) + soff;
+ }
+
reserved_word:
switch (tmp) {
s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
TRUE, &morelen);
if (!morelen)
- Perl_croak(aTHX_ "Bad name after %"SVf"%s",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
+ Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+ UTF8fARG(UTF, len, PL_tokenbuf),
*s == '\'' ? "'" : "::");
len += morelen;
pkgname = 1;
if (ckWARN(WARN_BAREWORD)
&& ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
- "Bareword \"%"SVf"\" refers to nonexistent package",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+ "Bareword \"%"UTF8f"\" refers to nonexistent package",
+ UTF8fARG(UTF, len, PL_tokenbuf));
len -= 2;
PL_tokenbuf[len] = '\0';
gv = NULL;
d = s + 1;
while (SPACE_OR_TAB(*d))
d++;
- if (*d == ')' && (sv = cv_const_sv(cv))) {
+ if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
s = d + 1;
goto its_constant;
}
if (cv) {
if (lastchar == '-' && penultchar != '-') {
- const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+ const STRLEN l = len ? len : strlen(PL_tokenbuf);
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
- SVfARG(tmpsv), SVfARG(tmpsv));
+ "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
+ UTF8fARG(UTF, l, PL_tokenbuf),
+ UTF8fARG(UTF, l, PL_tokenbuf));
}
/* Check for a constant sub */
- if ((sv = cv_const_sv(cv))) {
+ if ((sv = cv_const_sv_or_av(cv))) {
its_constant:
op_free(rv2cv_op);
SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
- pl_yylval.opval->op_private = OPpCONST_FOLDED;
- pl_yylval.opval->op_flags |= OPf_SPECIAL;
+ if (SvTYPE(sv) == SVt_PVAV)
+ pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
+ pl_yylval.opval);
+ else {
+ pl_yylval.opval->op_private = OPpCONST_FOLDED;
+ pl_yylval.opval->op_folded = 1;
+ pl_yylval.opval->op_flags |= OPf_SPECIAL;
+ }
TOKEN(WORD);
}
op_free(rv2cv_op);
safe_bareword:
- if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
+ if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
+ && saw_infix_sigil) {
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Operator or semicolon missing before %c%"SVf,
- lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
- strlen(PL_tokenbuf),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ "Operator or semicolon missing before %c%"UTF8f,
+ lastchar,
+ UTF8fARG(UTF, strlen(PL_tokenbuf),
+ PL_tokenbuf));
Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
"Ambiguous use of %c resolved as operator %c",
lastchar, lastchar);
case KEY___END__: {
GV *gv;
if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
- const char *pname = "main";
- STRLEN plen = 4;
- U32 putf8 = 0;
- if (PL_tokenbuf[2] == 'D')
- {
- HV * const stash =
- PL_curstash ? PL_curstash : PL_defstash;
- pname = HvNAME_get(stash);
- plen = HvNAMELEN (stash);
- if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
- }
- gv = gv_fetchpvn_flags(
- Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
- plen+6, GV_ADD|putf8, SVt_PVIO
- );
+ HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
+ ? PL_curstash
+ : PL_defstash;
+ gv = (GV *)*hv_fetchs(stash, "DATA", 1);
+ if (!isGV(gv))
+ gv_init(gv,stash,"DATA",4,0);
GvMULTI_on(gv);
if (!GvIO(gv))
GvIOp(gv) = newIO();
goto just_a_word;
}
if (!tmp)
- Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
- SVfARG(newSVpvn_flags(PL_tokenbuf, len,
- (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+ Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+ UTF8fARG(UTF, len, PL_tokenbuf));
if (tmp < 0)
tmp = -tmp;
else if (tmp == KEY_require || tmp == KEY_do
&& !(t[0] == ':' && t[1] == ':')
&& !keyword(s, d-s, 0)
) {
- SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0));
Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
- "Precedence problem: open %"SVf" should be open(%"SVf")",
- SVfARG(tmpsv), SVfARG(tmpsv));
+ "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+ UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
}
}
LOP(OP_OPEN,XTERM);
expectation attrful;
bool have_name, have_proto;
const int key = tmp;
+#ifndef PERL_MAD
SV *format_name = NULL;
+#endif
#ifdef PERL_MAD
SV *tmpwhite = 0;
#ifdef PERL_MAD
if (PL_madskills)
nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
-#endif
+#else
if (key == KEY_format)
format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
+#endif
*PL_tokenbuf = '&';
if (memchr(tmpbuf, ':', len) || key != KEY_sub
|| pad_findmy_pvn(
#else
if (format_name) {
start_force(PL_curforce);
- if (PL_madskills)
- curmad('X', newSVpvn(start,s-start));
NEXTVAL_NEXTTOKE.opval
= (OP*)newSVOP(OP_CONST,0, format_name);
NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
/* Look for a prototype */
if (*s == '(') {
- char *p;
- bool bad_proto = FALSE;
- bool in_brackets = FALSE;
- char greedy_proto = ' ';
- bool proto_after_greedy_proto = FALSE;
- bool must_be_last = FALSE;
- bool underscore = FALSE;
- bool seen_underscore = FALSE;
- const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
- STRLEN tmplen;
-
s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
if (!s)
Perl_croak(aTHX_ "Prototype not terminated");
- /* strip spaces and check for bad characters */
- d = SvPV(PL_lex_stuff, tmplen);
- tmp = 0;
- for (p = d; tmplen; tmplen--, ++p) {
- if (!isSPACE(*p)) {
- d[tmp++] = *p;
-
- if (warnillegalproto) {
- if (must_be_last)
- proto_after_greedy_proto = TRUE;
- if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
- bad_proto = TRUE;
- }
- else {
- if ( underscore ) {
- if ( !strchr(";@%", *p) )
- bad_proto = TRUE;
- underscore = FALSE;
- }
- if ( *p == '[' ) {
- in_brackets = TRUE;
- }
- else if ( *p == ']' ) {
- in_brackets = FALSE;
- }
- else if ( (*p == '@' || *p == '%') &&
- ( tmp < 2 || d[tmp-2] != '\\' ) &&
- !in_brackets ) {
- must_be_last = TRUE;
- greedy_proto = *p;
- }
- else if ( *p == '_' ) {
- underscore = seen_underscore = TRUE;
- }
- }
- }
- }
- }
- d[tmp] = '\0';
- if (proto_after_greedy_proto)
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Prototype after '%c' for %"SVf" : %s",
- greedy_proto, SVfARG(PL_subname), d);
- if (bad_proto) {
- SV *dsv = newSVpvs_flags("", SVs_TEMP);
- Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
- "Illegal character %sin prototype for %"SVf" : %s",
- seen_underscore ? "after '_' " : "",
- SVfARG(PL_subname),
- SvUTF8(PL_lex_stuff)
- ? sv_uni_display(dsv,
- newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
- tmp,
- UNI_DISPLAY_ISPRINT)
- : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
- PERL_PV_ESCAPE_NONASCII));
- }
- SvCUR_set(PL_lex_stuff, tmp);
+ (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
have_proto = TRUE;
#ifdef PERL_MAD
FUN0(OP_WANTARRAY);
case KEY_write:
-#ifdef EBCDIC
- {
- char ctl_l[2];
- ctl_l[0] = toCTRL('L');
- ctl_l[1] = '\0';
- gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
- }
-#else
- /* Make sure $^L is defined */
- gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
-#endif
+ /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
+ * we use the same number on EBCDIC */
+ gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
UNI(OP_ENTERWRITE);
case KEY_x:
{
/* Downgraded from fatal to warning 20000522 mjd */
Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
- "Possible unintended interpolation of %"SVf" in string",
- SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
- SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
+ "Possible unintended interpolation of %"UTF8f
+ " in string",
+ UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
}
}
s++;
}
-#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
- || isCNTRL_A((U8)(d)) \
- || isDIGIT_A((U8)(d)) \
- || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
+/* \c?, \c\, \c^, \c_, and \cA..\cZ minus the ones that have traditionally
+ * been matched by \s on ASCII platforms, are the legal control char names
+ * here, that is \c? plus 1-32 minus the \s ones. */
+#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
+ || isDIGIT_A((U8)(d)) \
+ || (!(u) && !isASCII((U8)(d))) \
+ || ((((U8)(d)) < 32) \
+ && (((((U8)(d)) >= 14) \
+ || (((U8)(d)) <= 8 && (d) != 0) \
+ || (((U8)(d)) == 13)))) \
+ || (((U8)(d)) == toCTRL('?')))
if (s < send
&& (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
{
/* shared is only null if we have gone beyond the outermost
lexing scope. In a file, we will have broken out of the
loop in the previous iteration. In an eval, the string buf-
- fer ends with "\n;", so the while condition below will have
+ fer ends with "\n;", so the while condition above will have
evaluated to false. So shared can never be null. */
assert(shared);
/* A LEXSHARED struct with a null ls_prev pointer is the outer-
}
CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
- lex_grow_linestr(SvCUR(PL_linestr) + 2);
+ s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+ /* ^That should be enough to avoid this needing to grow: */
sv_catpvs(PL_linestr, "\n\0");
+ assert(s == SvPVX(PL_linestr));
+ PL_bufend = SvEND(PL_linestr);
}
s = PL_bufptr;
#ifdef PERL_MAD
/* scan_str
- takes: start position in buffer
- keep_quoted preserve \ on the embedded delimiter(s)
- keep_delims preserve the delimiters around the string
- re_reparse compiling a run-time /(?{})/:
- collapse // to /, and skip encoding src
+ takes:
+ start position in buffer
+ keep_quoted preserve \ on the embedded delimiter(s)
+ keep_delims preserve the delimiters around the string
+ re_reparse compiling a run-time /(?{})/:
+ collapse // to /, and skip encoding src
+ deprecate_escaped_meta issue a deprecation warning for cer-
+ tain paired metacharacters that appear
+ escaped within it
returns: position to continue reading from buffer
side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
updates the read buffer.
STATIC char *
S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
- bool deprecate_escaped_meta /* Should we issue a deprecation warning
- for certain paired metacharacters that
- appear escaped within it */
+ bool deprecate_escaped_meta
)
{
dVAR;
* happen for <>, as they aren't metas. */
if (deprecate_escaped_meta
&& (PL_multi_open == PL_multi_close
- || ! ckWARN_d(WARN_DEPRECATED)
- || PL_multi_open == '<'))
+ || PL_multi_open == '<'
+ || ! ckWARN_d(WARN_DEPRECATED)))
{
deprecate_escaped_meta = FALSE;
}
int offset = s - SvPVX_const(PL_linestr);
const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
&offset, (char*)termstr, termlen);
- const char * const ns = SvPVX_const(PL_linestr) + offset;
- char * const svlast = SvEND(sv) - 1;
+ const char *ns;
+ char *svlast;
+
+ if (SvIsCOW(PL_linestr)) {
+ STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
+ STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
+ STRLEN last_lop_pos, re_eval_start_pos, s_pos;
+ char *buf = SvPVX(PL_linestr);
+ bufend_pos = PL_parser->bufend - buf;
+ bufptr_pos = PL_parser->bufptr - buf;
+ oldbufptr_pos = PL_parser->oldbufptr - buf;
+ oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
+ linestart_pos = PL_parser->linestart - buf;
+ last_uni_pos = PL_parser->last_uni
+ ? PL_parser->last_uni - buf
+ : 0;
+ last_lop_pos = PL_parser->last_lop
+ ? PL_parser->last_lop - buf
+ : 0;
+ re_eval_start_pos =
+ PL_parser->lex_shared->re_eval_start ?
+ PL_parser->lex_shared->re_eval_start - buf : 0;
+ s_pos = s - buf;
+
+ sv_force_normal(PL_linestr);
+
+ buf = SvPVX(PL_linestr);
+ PL_parser->bufend = buf + bufend_pos;
+ PL_parser->bufptr = buf + bufptr_pos;
+ PL_parser->oldbufptr = buf + oldbufptr_pos;
+ PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
+ PL_parser->linestart = buf + linestart_pos;
+ if (PL_parser->last_uni)
+ PL_parser->last_uni = buf + last_uni_pos;
+ if (PL_parser->last_lop)
+ PL_parser->last_lop = buf + last_lop_pos;
+ if (PL_parser->lex_shared->re_eval_start)
+ PL_parser->lex_shared->re_eval_start =
+ buf + re_eval_start_pos;
+ s = buf + s_pos;
+ }
+ ns = SvPVX_const(PL_linestr) + offset;
+ svlast = SvEND(sv) - 1;
for (; s < ns; s++) {
if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
* context where the delimiter is also a metacharacter,
* the backslash is useless, and deprecated. () and []
* are meta in any context. {} are meta only when
- * appearing in a quantifier or in things like '\p{'.
- * They also aren't meta unless there is a matching
- * closed, escaped char later on within the string.
- * If 's' points to an open, set a flag; if to a close,
- * test that flag, and raise a warning if it was set */
+ * appearing in a quantifier or in things like '\p{'
+ * (but '\\p{' isn't meta). They also aren't meta
+ * unless there is a matching closed, escaped char
+ * later on within the string. If 's' points to an
+ * open, set a flag; if to a close, test that flag, and
+ * raise a warning if it was set */
if (deprecate_escaped_meta) {
if (*s == PL_multi_open) {
if (*s != '{') {
escaped_open = s;
}
- else if (regcurly(s,
- TRUE /* Look for a closing
- '\}' */)
- || (s - start > 2 /* Look for e.g.
- '\x{' */
- && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
- {
+ /* Look for a closing '\}' */
+ else if (regcurly(s, TRUE)) {
escaped_open = s;
}
+ /* Look for e.g. '\x{' */
+ else if (s - start > 2
+ && _generic_isCC(*(s-2),
+ _CC_BACKSLASH_FOO_LBRACE_IS_META))
+ { /* Exclude '\\x', '\\\\x', etc. */
+ char *lookbehind = s - 4;
+ bool is_meta = TRUE;
+ while (lookbehind >= start
+ && *lookbehind == '\\')
+ {
+ is_meta = ! is_meta;
+ lookbehind--;
+ }
+ if (is_meta) {
+ escaped_open = s;
+ }
+ }
}
else if (escaped_open) {
Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
if (context)
- Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
- SVfARG(newSVpvn_flags(context, contlen,
- SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+ Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+ UTF8fARG(UTF, contlen, context));
else
Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
#endif
}
break;
- case 0xEF:
- if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
- if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
- s += 3; /* UTF-8 */
- }
- break;
+ case BOM_UTF8_FIRST_BYTE: {
+ const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
+ if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+ s += len + 1; /* UTF-8 */
+ }
+ break;
+ }
case 0:
if (slen > 3) {
if (s[1] == 0) {
#endif
}
}
-#ifdef EBCDIC
- case 0xDD:
- if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
- if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
- s += 4; /* UTF-8 */
- }
- break;
-#endif
default:
if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
/* Append native character for the rev point */
tmpend = uvchr_to_utf8(tmpbuf, rev);
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
- if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
+ if (!NATIVE_IS_INVARIANT(rev))
SvUTF8_on(sv);
if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
s = ++pos;