}
else {
t = s;
- while (!isSPACE(*t))
+ while (*t && !isSPACE(*t))
t++;
e = t;
}
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
assert('@' == funny || '$' == funny || DOLSHARP == funny);
PL_lex_state = LEX_INTERPEND;
- force_next(POSTJOIN);
+ if ('@' == funny)
+ force_next(POSTJOIN);
}
force_next(next);
PL_bufptr+=2;
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
- if (!SvCUR(res))
+ if (!SvCUR(res)) {
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
+ "Unknown charname '' is deprecated");
return res;
+ }
if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
e - backslash_ptr,
if (*s == ' ' && *(s-1) == ' ') {
goto multi_spaces;
}
- if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s++;
}
}
{
goto bad_charname;
}
- if (*s == *NBSP_UTF8
- && *(s+1) == *(NBSP_UTF8+1)
- && ckWARN_d(WARN_DEPRECATED))
- {
- Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
- "NO-BREAK SPACE in a charnames "
- "alias definition is deprecated");
- }
s += 2;
}
else {
}
NUM_ESCAPE_INSERT:
- /* Insert oct or hex escaped character. There will always be
- * enough room in sv since such escapes will be longer than any
- * UTF-8 sequence they can end up as, except if they force us
- * to recode the rest of the string into utf8 */
+ /* Insert oct or hex escaped character. */
/* Here uv is the ordinal of the next character being added */
if (UVCHR_IS_INVARIANT(uv)) {
}
if (has_utf8) {
+ /* Usually, there will already be enough room in 'sv'
+ * since such escapes are likely longer than any UTF-8
+ * sequence they can end up as. This isn't the case on
+ * EBCDIC where \x{40000000} contains 12 bytes, and the
+ * UTF-8 for it contains 14. And, we have to allow for
+ * a trailing NUL. It probably can't happen on ASCII
+ * platforms, but be safe */
+ const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
+ + 1;
+ if (UNLIKELY(needed > SvLEN(sv))) {
+ SvCUR_set(sv, d - SvPVX_const(sv));
+ d = sv_grow(sv, needed) + SvCUR(sv);
+ }
+
d = (char*)uvchr_to_utf8((U8*)d, uv);
if (PL_lex_inwhat == OP_TRANS
&& PL_sublex_info.sub_op)
}
/* Add the (Unicode) code point to the output. */
- if (UNI_IS_INVARIANT(uv)) {
+ if (OFFUNI_IS_INVARIANT(uv)) {
*d++ = (char) LATIN1_TO_NATIVE(uv);
}
else {
/* The regex compiler is
* expecting Unicode, not
* native */
- (U8) NATIVE_TO_LATIN1(*str));
+ NATIVE_TO_LATIN1(*str));
PERL_MY_SNPRINTF_POST_GUARD(len,
sizeof(hex_string));
Copy(hex_string, d, 3, char);
retry:
switch (*s) {
default:
- if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
+ if (UTF) {
+ if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
+ ENTER;
+ SAVESPTR(PL_warnhook);
+ PL_warnhook = PERL_WARNHOOK_FATAL;
+ utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
+ LEAVE;
+ }
+ if (isIDFIRST_utf8((U8*)s)) {
+ goto keylookup;
+ }
+ }
+ else if (isALNUMC(*s)) {
goto keylookup;
- {
+ }
+ {
SV *dsv = newSVpvs_flags("", SVs_TEMP);
const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
UTF8SKIP(s),
else
/* skip plain q word */
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
else if (isWORDCHAR_lazy_if(t,UTF)) {
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
- t += UTF8SKIP(t);
+ t += UTF ? UTF8SKIP(t) : 1;
}
while (t < PL_bufend && isSPACE(*t))
t++;
TOKEN('$');
case '@':
- if (PL_expect == XOPERATOR)
- no_op("Array", s);
- else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
+ if (PL_expect == XPOSTDEREF)
+ POSTDEREF('@');
PL_tokenbuf[0] = '@';
s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
+ if (PL_expect == XOPERATOR) {
+ d = s;
+ if (PL_bufptr > s) {
+ d = PL_bufptr-1;
+ PL_bufptr = PL_oldbufptr;
+ }
+ no_op("Array", d);
+ }
pl_yylval.ival = 0;
if (!PL_tokenbuf[1]) {
PREREF('@');
* 2) '{'
* The final case currently doesn't get this far in the program, so we
* don't test for it. If that were to change, it would be ok to allow it.
- * c) When not under Unicode rules, any upper Latin1 character
- * d) Otherwise, when unicode rules are used, all XIDS characters.
+ * b) When not under Unicode rules, any upper Latin1 character
+ * c) Otherwise, when unicode rules are used, all XIDS characters.
*
* Because all ASCII characters have the same representation whether
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
- * '{' without knowing if is UTF-8 or not.
- * EBCDIC already uses the rules that ASCII platforms will use after the
- * deprecation cycle; see comment below about the deprecation. */
-#ifdef EBCDIC
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
+ * '{' without knowing if is UTF-8 or not. */
+#define VALID_LEN_ONE_IDENT(s, is_utf8) \
(isGRAPH_A(*(s)) || ((is_utf8) \
? isIDFIRST_utf8((U8*) (s)) \
: (isGRAPH_L1(*s) \
&& LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
-#else
-# define VALID_LEN_ONE_IDENT(s, is_utf8) \
- (isGRAPH_A(*(s)) || ((is_utf8) \
- ? isIDFIRST_utf8((U8*) (s)) \
- : ! isASCII_utf8((U8*) (s))))
-#endif
STATIC char *
S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
: 1)
&& VALID_LEN_ONE_IDENT(s, is_utf8))
{
- /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
- * because often it has no graphic representation. (We can't get to
- * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
- * test for it.) */
- if ((is_utf8)
- ? ! isGRAPH_utf8( (U8*) s)
- : (! isGRAPH_L1( (U8) *s)
- || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
- {
- deprecate("literal non-graphic characters in variable names");
- }
-
if (is_utf8) {
const STRLEN skip = UTF8SKIP(s);
STRLEN i;
"Use of /c modifier is meaningless without /g" );
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
PL_lex_op = (OP*)pm;
pl_yylval.ival = OP_MATCH;
}
}
- STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+ if (UNLIKELY((x_mod_count) > 1)) {
+ yyerror("Only one /x regex modifier is allowed");
+ }
if ((pm->op_pmflags & PMf_CONTINUE)) {
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
SV *linestr;
char *bufend;
char * const olds = s;
- PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+ PERL_CONTEXT * const cx = CX_CUR();
/* These two fields are not set until an inner lexing scope is
entered. But we need them set here. */
shared->ls_bufptr = s;
goto streaming;
}
}
- else { /* eval */
+ else { /* eval or we've already hit EOF */
s = (char*)memchr((void*)s, '\n', PL_bufend - s);
- assert(s);
+ if (!s)
+ goto interminable;
}
linestr = shared->ls_linestr;
bufend = SvEND(linestr);
else
{
SV *linestr_save;
+ char *oldbufptr_save;
streaming:
sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
term = PL_tokenbuf[1];
len--;
linestr_save = PL_linestr; /* must restore this afterwards */
d = s; /* and this */
+ oldbufptr_save = PL_oldbufptr;
PL_linestr = newSVpvs("");
PL_bufend = SvPVX(PL_linestr);
while (1) {
restore PL_linestr. */
SvREFCNT_dec_NN(PL_linestr);
PL_linestr = linestr_save;
+ PL_oldbufptr = oldbufptr_save;
goto interminable;
}
CopLINE_set(PL_curcop, origline);
PL_linestr = linestr_save;
PL_linestart = SvPVX(linestr_save);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ PL_oldbufptr = oldbufptr_save;
s = d;
break;
}
* multiple fp operations. */
bool hexfp = FALSE;
int total_bits = 0;
+ int significant_bits = 0;
#if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
# define HEXFP_UQUAD
Uquad_t hexfp_uquad = 0;
#endif
NV hexfp_mult = 1.0;
UV high_non_zero = 0; /* highest digit */
+ int non_zero_integer_digits = 0;
PERL_ARGS_ASSERT_SCAN_NUM;
if (high_non_zero == 0 && b > 0)
high_non_zero = b;
+ if (high_non_zero)
+ non_zero_integer_digits++;
+
/* this could be hexfp, but peek ahead
* to avoid matching ".." */
if (UNLIKELY(HEXFP_PEEK(s))) {
* detection will shortly be more thorough with the
* underbar checks. */
const char* h = s;
+ significant_bits = non_zero_integer_digits * shift;
#ifdef HEXFP_UQUAD
hexfp_uquad = u;
#else /* HEXFP_NV */
hexfp_nv = u;
#endif
+ /* Ignore the leading zero bits of
+ * the high (first) non-zero digit. */
+ if (high_non_zero) {
+ if (high_non_zero < 0x8)
+ significant_bits--;
+ if (high_non_zero < 0x4)
+ significant_bits--;
+ if (high_non_zero < 0x2)
+ significant_bits--;
+ }
+
if (*h == '.') {
#ifdef HEXFP_NV
- NV mult = 1 / 16.0;
+ NV nv_mult = 1.0;
#endif
- h++;
- while (isXDIGIT(*h) || *h == '_') {
+ bool accumulate = TRUE;
+ for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
if (isXDIGIT(*h)) {
U8 b = XDIGIT_VALUE(*h);
- total_bits += shift;
+ significant_bits += shift;
#ifdef HEXFP_UQUAD
- hexfp_uquad <<= shift;
- hexfp_uquad |= b;
- hexfp_frac_bits += shift;
+ if (accumulate) {
+ if (significant_bits < NV_MANT_DIG) {
+ /* We are in the long "run" of xdigits,
+ * accumulate the full four bits. */
+ hexfp_uquad <<= shift;
+ hexfp_uquad |= b;
+ hexfp_frac_bits += shift;
+ } else {
+ /* We are at a hexdigit either at,
+ * or straddling, the edge of mantissa.
+ * We will try grabbing as many as
+ * possible bits. */
+ int tail =
+ significant_bits - NV_MANT_DIG;
+ if (tail <= 0)
+ tail += shift;
+ hexfp_uquad <<= tail;
+ hexfp_uquad |= b >> (shift - tail);
+ hexfp_frac_bits += tail;
+
+ /* Ignore the trailing zero bits
+ * of the last non-zero xdigit.
+ *
+ * The assumption here is that if
+ * one has input of e.g. the xdigit
+ * eight (0x8), there is only one
+ * bit being input, not the full
+ * four bits. Conversely, if one
+ * specifies a zero xdigit, the
+ * assumption is that one really
+ * wants all those bits to be zero. */
+ if (b) {
+ if ((b & 0x1) == 0x0) {
+ significant_bits--;
+ if ((b & 0x2) == 0x0) {
+ significant_bits--;
+ if ((b & 0x4) == 0x0) {
+ significant_bits--;
+ }
+ }
+ }
+ }
+
+ accumulate = FALSE;
+ }
+ } else {
+ /* Keep skipping the xdigits, and
+ * accumulating the significant bits,
+ * but do not shift the uquad
+ * (which would catastrophically drop
+ * high-order bits) or accumulate the
+ * xdigits anymore. */
+ }
#else /* HEXFP_NV */
- hexfp_nv += b * mult;
- mult /= 16.0;
+ if (accumulate) {
+ nv_mult /= 16.0;
+ if (nv_mult > 0.0)
+ hexfp_nv += b * nv_mult;
+ else
+ accumulate = FALSE;
+ }
#endif
}
- h++;
+ if (significant_bits >= NV_MANT_DIG)
+ accumulate = FALSE;
}
}
- if (total_bits >= 4) {
- if (high_non_zero < 0x8)
- total_bits--;
- if (high_non_zero < 0x4)
- total_bits--;
- if (high_non_zero < 0x2)
- total_bits--;
- }
-
- if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
+ if ((total_bits > 0 || significant_bits > 0) &&
+ isALPHA_FOLD_EQ(*h, 'p')) {
bool negexp = FALSE;
h++;
if (*h == '+')
*d = '\0';
if (UNLIKELY(hexfp)) {
# ifdef NV_MANT_DIG
- if (total_bits > NV_MANT_DIG)
+ if (significant_bits > NV_MANT_DIG)
Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
"Hexadecimal float: mantissa overflow");
# endif
"Integer overflow in decimal number");
}
}
-#ifdef EBCDIC
- if (rev > 0x7FFFFFFF)
- Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
-#endif
+
/* Append native character for the rev point */
tmpend = uvchr_to_utf8(tmpbuf, rev);
sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);