}
if (name)
Perl_sv_catpv(aTHX_ report, name);
- else if ((char)rv > ' ' && (char)rv <= '~')
+ else if (isGRAPH(rv))
{
Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
if ((char)rv == 'p')
parser->bufend = parser->bufptr + SvCUR(parser->linestr);
parser->last_lop = parser->last_uni = NULL;
- assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
+ STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
|LEX_DONT_CLOSE_RSFP));
SV * const sv = newSVpvn_utf8(start, len,
!IN_BYTES
&& UTF
- && !is_ascii_string((const U8*)start, len)
+ && !is_invariant_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}
s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
if (check_keyword) {
char *s2 = PL_tokenbuf;
+ STRLEN len2 = len;
if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
- s2 += 6, len -= 6;
- if (keyword(s2, len, 0))
+ s2 += 6, len2 -= 6;
+ if (keyword(s2, len2, 0))
return start;
}
if (token == METHOD) {
+ PL_parser->herelines;
PL_parser->herelines = 0;
}
- return ',';
+ return '/';
}
else {
const line_t l = CopLINE(PL_curcop);
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
+ if (!SvCUR(res))
+ return res;
+
if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
e - backslash_ptr,
&first_bad_char_loc))
SvPOK_on(sv);
*d = '\0';
/* See Note on sizing above. */
- sv_utf8_upgrade_flags_grow(sv,
- SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
- UNISKIP(uv) + (STRLEN)(send - s) + 1);
+ sv_utf8_upgrade_flags_grow(
+ sv,
+ SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
+ /* Above-latin1 in string
+ * implies no encoding */
+ |SV_UTF8_NO_ENCODING,
+ UNISKIP(uv) + (STRLEN)(send - s) + 1);
d = SvPVX(sv) + SvCUR(sv);
has_utf8 = TRUE;
}
if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
+ | PERL_SCAN_SILENT_ILLDIGIT
| PERL_SCAN_DISALLOW_PREFIX;
STRLEN len;
s += 2; /* Skip to next char after the 'U+' */
len = e - s;
uv = grok_hex(s, &len, &flags, NULL);
- if (len == 0 || len != (STRLEN)(e - s)) {
+ if (len == 0
+ || ( len != (STRLEN)(e - s) && s[len] != '.'
+ && PL_lex_inpat))
+ {
+ bad_NU:
yyerror("Invalid hexadecimal number in \\N{U+...}");
s = e + 1;
continue;
}
if (PL_lex_inpat) {
- s -= 5; /* Include the '\N{U+' */
#ifdef EBCDIC
+ s -= 5; /* Include the '\N{U+' */
/* On EBCDIC platforms, in \N{U+...}, the '...' is a
* Unicode value, so convert to native so downstream
* code can continue to assume it's native */
+ /* XXX This should be in the regexp parser,
+ because doing it here makes /\N{U+41}/ and
+ =~ '\N{U+41}' do different things. */
d += my_snprintf(d, e - s + 1 + 1, /* includes the '}'
and the \0 */
- "\\N{U+%X}",
+ "\\N{U+%X",
(unsigned int) UNI_TO_NATIVE(uv));
+ s += 5 + len;
+ while (*s == '.') {
+ s++;
+ len = e - s;
+ uv = grok_hex(s, &len, &flags, NULL);
+ if (!len
+ || (len != (STRLEN)(e - s) && s[len] != '.'))
+ goto bad_NU;
+ s--;
+ d += my_snprintf(
+ d, e - s + 1 + 1, ".%X",
+ (unsigned int)UNI_TO_NATIVE(uv)
+ );
+ s += len + 1;
+ }
+ *(d++) = '}';
#else
/* On non-EBCDIC platforms, pass it through unchanged.
- * The reason we evaluated the number above is to make
+ * The reason we evaluate the numbers is to make
* sure there wasn't a syntax error. */
- Copy(s, d, e - s + 1, char); /* +1 is for the '}' */
- d += e - s + 1;
+ const char * const orig_s = s - 5;
+ while (*s == '.') {
+ s++;
+ len = e - s;
+ uv = grok_hex(s, &len, &flags, NULL);
+ if (!len
+ || (len != (STRLEN)(e - s) && s[len] != '.'))
+ goto bad_NU;
+ }
+ /* +1 is for the '}' */
+ Copy(orig_s, d, e - orig_s + 1, char);
+ d += e - orig_s + 1;
#endif
}
else { /* Not a pattern: convert the hex to string */
const STRLEN off = d - SvPVX_const(sv);
d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
}
- if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */
- sv_utf8_upgrade(res);
+ if (! SvUTF8(res)) { /* Make sure \N{} return is UTF-8 */
+ sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
str = SvPV_const(res, len);
}
Copy(str, d, len, char);
&& !(last_un_char == '$' || last_un_char == '@'
|| last_un_char == '&')
&& isALPHA(*s) && s[1] && isALPHA(s[1])) {
- char *d = tmpbuf;
+ char *d = s;
while (isALPHA(*s))
- *d++ = *s++;
- *d = '\0';
- if (keyword(tmpbuf, d - tmpbuf, 0))
+ s++;
+ if (keyword(d, s - d, 0))
weight -= 150;
}
if (un_char == last_un_char + 1)
Perl_croak(aTHX_
"\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
#endif
- case ' ': case '\t': case '\f': case 013:
+ case ' ': case '\t': case '\f': case '\v':
s++;
goto retry;
case '#':
sv_free(sv);
CvMETHOD_on(PL_compcv);
}
+ else if (!PL_in_my && len == 5
+ && strnEQ(SvPVX(sv), "const", len))
+ {
+ sv_free(sv);
+ Perl_ck_warner_d(aTHX_
+ packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
+ ":const is experimental"
+ );
+ CvANONCONST_on(PL_compcv);
+ if (!CvANON(PL_compcv))
+ yyerror(":const is not permitted on named "
+ "subroutines");
+ }
/* After we've set the flags, it could be argued that
we don't need to do the attributes.pm-based setting
process, and shouldn't bother appending recognized
s--;
if (PL_expect == XSTATE && isALPHA(tmp) &&
(s == PL_linestart+1 || s[-2] == '\n') )
- {
- if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
- || PL_lex_state != LEX_NORMAL) {
- d = PL_bufend;
- while (s < d) {
- if (*s++ == '\n') {
- incline(s);
- if (strnEQ(s,"=cut",4)) {
- s = strchr(s,'\n');
- if (s)
- s++;
- else
- s = d;
- incline(s);
- goto retry;
- }
- }
- }
- goto retry;
- }
- s = PL_bufend;
- PL_parser->in_pod = 1;
- goto retry;
- }
+ {
+ if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
+ || PL_lex_state != LEX_NORMAL) {
+ d = PL_bufend;
+ while (s < d) {
+ if (*s++ == '\n') {
+ incline(s);
+ if (strnEQ(s,"=cut",4)) {
+ s = strchr(s,'\n');
+ if (s)
+ s++;
+ else
+ s = d;
+ incline(s);
+ goto retry;
+ }
+ }
+ }
+ goto retry;
+ }
+ s = PL_bufend;
+ PL_parser->in_pod = 1;
+ goto retry;
+ }
}
if (PL_expect == XBLOCK) {
const char *t = s;
}
/* avoid v123abc() or $h{v1}, allow C<print v10;> */
if (!isALPHA(*start) && (PL_expect == XTERM
- || PL_expect == XSTATE
+ || PL_expect == XREF || PL_expect == XSTATE
|| PL_expect == XTERMORDORDOR)) {
GV *const gv = gv_fetchpvn_flags(s, start - s,
UTF ? SVf_UTF8 : 0, SVt_PVCV);
char tmpbuf[sizeof PL_tokenbuf + 1];
*tmpbuf = '&';
Copy(PL_tokenbuf, tmpbuf+1, len, char);
- off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, len+1, 0);
if (off != NOT_IN_PAD) {
assert(off); /* we assume this is boolean-true below */
if (PAD_COMPNAME_FLAGS_isOUR(off)) {
*PL_tokenbuf = '&';
if (memchr(tmpbuf, ':', len) || key != KEY_sub
|| pad_findmy_pvn(
- PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
+ PL_tokenbuf, len + 1, 0
) != NOT_IN_PAD)
sv_setpvn(PL_subname, tmpbuf, len);
else {
if (!has_colon) {
if (!PL_in_my)
tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
- UTF ? SVf_UTF8 : 0);
+ 0);
if (tmp != NOT_IN_PAD) {
/* might be an "our" variable" */
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
char tmpbuf[256];
Copy(w, tmpbuf+1, s - w, char);
*tmpbuf = '&';
- off = pad_findmy_pvn(tmpbuf, s-w+1, UTF ? SVf_UTF8 : 0);
+ off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
if (off != NOT_IN_PAD) return;
}
Perl_croak(aTHX_ "No comma allowed after %s", what);
yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
return SvREFCNT_inc_simple_NN(sv);
}
-now_ok:
+ now_ok:
cv = *cvp;
if (!pv && s)
pv = newSVpvn_flags(s, len, SVs_TEMP);
*
* Because all ASCII characters have the same representation whether
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not */
+ * '{' without knowing if is UTF-8 or not.
+ * EBCDIC already uses the rules that ASCII platforms will use after the
+ * deprecation cycle; see comment below about the deprecation. */
#ifdef EBCDIC
# define VALID_LEN_ONE_IDENT(s, is_utf8) \
(isGRAPH_A(*(s)) || ((is_utf8) \
/* try to find it in the pad for this block, otherwise find
add symbol table ops
*/
- const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
+ const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
if (tmp != NOT_IN_PAD) {
if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
else {
GV *gv;
++d;
-intro_sym:
+ intro_sym:
gv = gv_fetchpv(d,
GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
SVt_PV);
/* if it starts with a v, it could be a v-string */
case 'v':
-vstring:
+ vstring:
sv = newSV(5); /* preallocate storage space */
ENTER_with_name("scan_vstring");
SAVEFREESV(sv);
if (needargs) {
const char *s2 = s;
while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
- || *s2 == 013)
+ || *s2 == '\v')
s2++;
if (*s2 == '{') {
PL_expect = XTERMBLOCK;
CvFLAGS(PL_compcv) |= flags;
PL_subline = CopLINE(PL_curcop);
- CvPADLIST_set(PL_compcv, pad_new(padnew_SAVE|padnew_SAVESUB));
+ CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
if (outsidecv && CvPADLIST(outsidecv))
- CvPADLIST(PL_compcv)->xpadl_outid =
- PadlistNAMES(CvPADLIST(outsidecv));
+ CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
return oldsavestack_ix;
}
}
else if (yychar > 255)
sv_catpvs(where_sv, "next token ???");
- else if (yychar == -2) { /* YYEMPTY */
+ else if (yychar == YYEMPTY) {
if (PL_lex_state == LEX_NORMAL ||
(PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
sv_catpvs(where_sv, "at end of line");