X-Git-Url: https://perl5.git.perl.org/perl5.git/blobdiff_plain/0c8adad77d308bd7d386922521a57b894a2bf959..710740a6ed7f31a3bcc5f991f3cb7b55fed8a68e:/toke.c

diff --git a/toke.c b/toke.c
index 9f56573..fd359ed 100644
--- a/toke.c
+++ b/toke.c
@@ -1694,7 +1694,7 @@ S_incline(pTHX_ const char *s)
     }
     else {
 	t = s;
-	while (!isSPACE(*t))
+	while (*t && !isSPACE(*t))
 	    t++;
 	e = t;
     }
@@ -3357,10 +3357,7 @@ S_scan_const(pTHX_ char *start)
 		}
 
 	      NUM_ESCAPE_INSERT:
-		/* Insert oct or hex escaped character.  There will always be
-		 * enough room in sv since such escapes will be longer than any
-		 * UTF-8 sequence they can end up as, except if they force us
-		 * to recode the rest of the string into utf8 */
+		/* Insert oct or hex escaped character. */
 		
 		/* Here uv is the ordinal of the next character being added */
 		if (UVCHR_IS_INVARIANT(uv)) {
@@ -3388,6 +3385,20 @@ S_scan_const(pTHX_ char *start)
                     }
 
                     if (has_utf8) {
+                       /* Usually, there will already be enough room in 'sv'
+                        * since such escapes are likely longer than any UTF-8
+                        * sequence they can end up as.  This isn't the case on
+                        * EBCDIC where \x{40000000} contains 12 bytes, and the
+                        * UTF-8 for it contains 14.  And, we have to allow for
+                        * a trailing NUL.  It probably can't happen on ASCII
+                        * platforms, but be safe */
+                        const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
+                                            + 1;
+                        if (UNLIKELY(needed > SvLEN(sv))) {
+                            SvCUR_set(sv, d - SvPVX_const(sv));
+                            d = sv_grow(sv, needed) + SvCUR(sv);
+                        }
+
 		        d = (char*)uvchr_to_utf8((U8*)d, uv);
 			if (PL_lex_inwhat == OP_TRANS
                             && PL_sublex_info.sub_op)
@@ -3582,7 +3593,7 @@ S_scan_const(pTHX_ char *start)
                                                   /* The regex compiler is
                                                    * expecting Unicode, not
                                                    * native */
-                                                  (U8) NATIVE_TO_LATIN1(*str));
+                                                  NATIVE_TO_LATIN1(*str));
                                     PERL_MY_SNPRINTF_POST_GUARD(len,
                                                            sizeof(hex_string));
                                     Copy(hex_string, d, 3, char);
@@ -4807,9 +4818,22 @@ Perl_yylex(pTHX)
   retry:
     switch (*s) {
     default:
-	if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
+	if (UTF) {
+            if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
+                ENTER;
+                SAVESPTR(PL_warnhook);
+                PL_warnhook = PERL_WARNHOOK_FATAL;
+                utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
+                LEAVE;
+            }
+            if (isIDFIRST_utf8((U8*)s)) {
+                goto keylookup;
+            }
+        }
+        else if (isALNUMC(*s)) {
 	    goto keylookup;
-	{
+	}
+    {
         SV *dsv = newSVpvs_flags("", SVs_TEMP);
         const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
                                                     UTF8SKIP(s),
@@ -5845,12 +5869,12 @@ Perl_yylex(pTHX)
 		    else
 			/* skip plain q word */
 			while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
-			     t += UTF8SKIP(t);
+			    t += UTF ? UTF8SKIP(t) : 1;
 		}
 		else if (isWORDCHAR_lazy_if(t,UTF)) {
-		    t += UTF8SKIP(t);
+		    t += UTF ? UTF8SKIP(t) : 1;
 		    while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
-			 t += UTF8SKIP(t);
+			t += UTF ? UTF8SKIP(t) : 1;
 		}
 		while (t < PL_bufend && isSPACE(*t))
 		    t++;
@@ -6353,11 +6377,18 @@ Perl_yylex(pTHX)
 	TOKEN('$');
 
     case '@':
-	if (PL_expect == XOPERATOR)
-	    no_op("Array", s);
-	else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
+        if (PL_expect == XPOSTDEREF)
+            POSTDEREF('@');
 	PL_tokenbuf[0] = '@';
 	s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
+	if (PL_expect == XOPERATOR) {
+            d = s;
+            if (PL_bufptr > s) {
+                d = PL_bufptr-1;
+                PL_bufptr = PL_oldbufptr;
+            }
+	    no_op("Array", d);
+        }
 	pl_yylval.ival = 0;
 	if (!PL_tokenbuf[1]) {
 	    PREREF('@');
@@ -9524,7 +9555,7 @@ S_scan_heredoc(pTHX_ char *s)
 	SV *linestr;
 	char *bufend;
 	char * const olds = s;
-	PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
+	PERL_CONTEXT * const cx = CX_CUR();
 	/* These two fields are not set until an inner lexing scope is
 	   entered.  But we need them set here. */
 	shared->ls_bufptr  = s;
@@ -9559,9 +9590,10 @@ S_scan_heredoc(pTHX_ char *s)
 		goto streaming;
 	    }
 	  }
-	else {	/* eval */
+	else {	/* eval or we've already hit EOF */
 	    s = (char*)memchr((void*)s, '\n', PL_bufend - s);
-	    assert(s);
+	    if (!s)
+                goto interminable;
 	}
 	linestr = shared->ls_linestr;
 	bufend = SvEND(linestr);
@@ -9617,12 +9649,14 @@ S_scan_heredoc(pTHX_ char *s)
     else
     {
       SV *linestr_save;
+      char *oldbufptr_save;
      streaming:
       sv_setpvs(tmpstr,"");   /* avoid "uninitialized" warning */
       term = PL_tokenbuf[1];
       len--;
       linestr_save = PL_linestr; /* must restore this afterwards */
       d = s;			 /* and this */
+      oldbufptr_save = PL_oldbufptr;
       PL_linestr = newSVpvs("");
       PL_bufend = SvPVX(PL_linestr);
       while (1) {
@@ -9639,6 +9673,7 @@ S_scan_heredoc(pTHX_ char *s)
 	       restore PL_linestr. */
 	    SvREFCNT_dec_NN(PL_linestr);
 	    PL_linestr = linestr_save;
+            PL_oldbufptr = oldbufptr_save;
 	    goto interminable;
 	}
 	CopLINE_set(PL_curcop, origline);
@@ -9673,6 +9708,7 @@ S_scan_heredoc(pTHX_ char *s)
 	    PL_linestr = linestr_save;
 	    PL_linestart = SvPVX(linestr_save);
 	    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+            PL_oldbufptr = oldbufptr_save;
 	    s = d;
 	    break;
 	}
@@ -10273,6 +10309,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
      * multiple fp operations. */
     bool hexfp = FALSE;
     int total_bits = 0;
+    int significant_bits = 0;
 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
 #  define HEXFP_UQUAD
     Uquad_t hexfp_uquad = 0;
@@ -10283,6 +10320,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 #endif
     NV hexfp_mult = 1.0;
     UV high_non_zero = 0; /* highest digit */
+    int non_zero_integer_digits = 0;
 
     PERL_ARGS_ASSERT_SCAN_NUM;
 
@@ -10435,6 +10473,9 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                     if (high_non_zero == 0 && b > 0)
                         high_non_zero = b;
 
+                    if (high_non_zero)
+                        non_zero_integer_digits++;
+
                     /* this could be hexfp, but peek ahead
                      * to avoid matching ".." */
                     if (UNLIKELY(HEXFP_PEEK(s))) {
@@ -10461,43 +10502,103 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                  * detection will shortly be more thorough with the
                  * underbar checks. */
                 const char* h = s;
+                significant_bits = non_zero_integer_digits * shift;
 #ifdef HEXFP_UQUAD
                 hexfp_uquad = u;
 #else /* HEXFP_NV */
                 hexfp_nv = u;
 #endif
+                /* Ignore the leading zero bits of
+                 * the high (first) non-zero digit. */
+                if (high_non_zero) {
+                    if (high_non_zero < 0x8)
+                        significant_bits--;
+                    if (high_non_zero < 0x4)
+                        significant_bits--;
+                    if (high_non_zero < 0x2)
+                        significant_bits--;
+                }
+
                 if (*h == '.') {
 #ifdef HEXFP_NV
-                    NV mult = 1 / 16.0;
+                    NV nv_mult = 1.0;
 #endif
+                    bool accumulate = TRUE;
                     for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
                         if (isXDIGIT(*h)) {
                             U8 b = XDIGIT_VALUE(*h);
-                            total_bits += shift;
-                            if (total_bits < NV_MANT_DIG) {
+                            significant_bits += shift;
 #ifdef HEXFP_UQUAD
-                                hexfp_uquad <<= shift;
-                                hexfp_uquad |= b;
-                                hexfp_frac_bits += shift;
+                            if (accumulate) {
+                                if (significant_bits < NV_MANT_DIG) {
+                                    /* We are in the long "run" of xdigits,
+                                     * accumulate the full four bits. */
+                                    hexfp_uquad <<= shift;
+                                    hexfp_uquad |= b;
+                                    hexfp_frac_bits += shift;
+                                } else {
+                                    /* We are at a hexdigit either at,
+                                     * or straddling, the edge of mantissa.
+                                     * We will try grabbing as many as
+                                     * possible bits. */
+                                    int tail =
+                                      significant_bits - NV_MANT_DIG;
+                                    if (tail <= 0)
+                                       tail += shift;
+                                    hexfp_uquad <<= tail;
+                                    hexfp_uquad |= b >> (shift - tail);
+                                    hexfp_frac_bits += tail;
+
+                                    /* Ignore the trailing zero bits
+                                     * of the last non-zero xdigit.
+                                     *
+                                     * The assumption here is that if
+                                     * one has input of e.g. the xdigit
+                                     * eight (0x8), there is only one
+                                     * bit being input, not the full
+                                     * four bits.  Conversely, if one
+                                     * specifies a zero xdigit, the
+                                     * assumption is that one really
+                                     * wants all those bits to be zero. */
+                                    if (b) {
+                                        if ((b & 0x1) == 0x0) {
+                                            significant_bits--;
+                                            if ((b & 0x2) == 0x0) {
+                                                significant_bits--;
+                                                if ((b & 0x4) == 0x0) {
+                                                    significant_bits--;
+                                                }
+                                            }
+                                        }
+                                    }
+
+                                    accumulate = FALSE;
+                                }
+                            } else {
+                                /* Keep skipping the xdigits, and
+                                 * accumulating the significant bits,
+                                 * but do not shift the uquad
+                                 * (which would catastrophically drop
+                                 * high-order bits) or accumulate the
+                                 * xdigits anymore. */
+                            }
 #else /* HEXFP_NV */
-                                hexfp_nv += b * mult;
-                                mult /= 16.0;
-#endif
+                            if (accumulate) {
+                                nv_mult /= 16.0;
+                                if (nv_mult > 0.0)
+                                    hexfp_nv += b * nv_mult;
+                                else
+                                    accumulate = FALSE;
                             }
+#endif
                         }
+                        if (significant_bits >= NV_MANT_DIG)
+                            accumulate = FALSE;
                     }
                 }
 
-                if (total_bits >= 4) {
-                    if (high_non_zero < 0x8)
-                        total_bits--;
-                    if (high_non_zero < 0x4)
-                        total_bits--;
-                    if (high_non_zero < 0x2)
-                        total_bits--;
-                }
-
-                if (total_bits > 0 && (isALPHA_FOLD_EQ(*h, 'p'))) {
+                if ((total_bits > 0 || significant_bits > 0) &&
+                    isALPHA_FOLD_EQ(*h, 'p')) {
                     bool negexp = FALSE;
                     h++;
                     if (*h == '+')
@@ -10741,7 +10842,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
 	    *d = '\0';
             if (UNLIKELY(hexfp)) {
 #  ifdef NV_MANT_DIG
-                if (total_bits > NV_MANT_DIG)
+                if (significant_bits > NV_MANT_DIG)
                     Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
                                    "Hexadecimal float: mantissa overflow");
 #  endif
@@ -11352,10 +11453,7 @@ Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
 					 "Integer overflow in decimal number");
 		}
 	    }
-#ifdef EBCDIC
-	    if (rev > 0x7FFFFFFF)
-		 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
-#endif
+
 	    /* Append native character for the rev point */
 	    tmpend = uvchr_to_utf8(tmpbuf, rev);
 	    sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);