toke.c: White-space only

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index c46b9e8..5477662 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -41,8 +41,8 @@ Individual members of C<PL_parser> have their own documentation.
  #include "dquote_inline.h"
  #include "invlist_inline.h"
  
-#define new_constant(a,b,c,d,e,f,g)    \
-       S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
+#define new_constant(a,b,c,d,e,f,g, h) \
+       S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
  
  #define pl_yylval      (PL_parser->yylval)
  
@@ -340,7 +340,7 @@ static struct debug_tokens {
      { GIVEN,           TOKENTYPE_IVAL,         "GIVEN" },
      { HASHBRACK,       TOKENTYPE_NONE,         "HASHBRACK" },
      { IF,              TOKENTYPE_IVAL,         "IF" },
-    { LABEL,           TOKENTYPE_PVAL,         "LABEL" },
+    { LABEL,           TOKENTYPE_OPVAL,        "LABEL" },
      { LOCAL,           TOKENTYPE_IVAL,         "LOCAL" },
      { LOOPEX,          TOKENTYPE_OPNUM,        "LOOPEX" },
      { LSTOP,           TOKENTYPE_OPNUM,        "LSTOP" },
@@ -1022,13 +1022,9 @@ Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
         if (flags & LEX_STUFF_UTF8) {
             goto plain_copy;
         } else {
-           STRLEN highhalf = 0;    /* Count of variants */
-           const char *p, *e = pv+len;
-           for (p = pv; p != e; p++) {
-               if (! UTF8_IS_INVARIANT(*p)) {
-                    highhalf++;
-                }
-            }
+           STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
+                                                       (U8 *) pv + len);
+            const char *p, *e = pv+len;;
             if (!highhalf)
                 goto plain_copy;
             lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
@@ -2332,7 +2328,7 @@ S_tokeq(pTHX_ SV *sv)
      SvCUR_set(sv, d - SvPVX_const(sv));
    finish:
      if ( PL_hints & HINT_NEW_STRING )
-       return new_constant(NULL, 0, "q", sv, pv, "q", 1);
+       return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
      return sv;
  }
  
@@ -2591,34 +2587,69 @@ S_sublex_done(pTHX)
  }
  
  STATIC SV*
-S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
+S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
+{
+    /* This justs wraps get_and_check_backslash_N_name() to output any error
+     * message it returns. */
+
+    const char * error_msg = NULL;
+    SV * result;
+
+    PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
+
+    /* charnames doesn't work well if there have been errors found */
+    if (PL_error_count > 0) {
+       return NULL;
+    }
+
+    result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
+
+    if (error_msg) {
+        yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
+    }
+
+    return result;
+}
+
+SV*
+Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
+                                          const char* const e,
+                                          const bool is_utf8,
+                                          const char ** error_msg)
  {
      /* <s> points to first character of interior of \N{}, <e> to one beyond the
       * interior, hence to the "}".  Finds what the name resolves to, returning
-     * an SV* containing it; NULL if no valid one found */
-
-    dVAR;
-    SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
+     * an SV* containing it; NULL if no valid one found.
+     *
+     * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
+     * doesn't have to be. */
  
+    SV* res;
      HV * table;
      SV **cvp;
      SV *cv;
      SV *rv;
      HV *stash;
      const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
+    dVAR;
  
      PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
  
+    assert(e >= s);
+    assert(s > (char *) 3);
+
+    res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
+
      if (!SvCUR(res)) {
          SvREFCNT_dec_NN(res);
          /* diag_listed_as: Unknown charname '%s' */
-        yyerror("Unknown charname ''");
+        *error_msg = Perl_form(aTHX_ "Unknown charname ''");
          return NULL;
      }
  
      res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
                          /* include the <}> */
-                        e - backslash_ptr + 1);
+                        e - backslash_ptr + 1, error_msg);
      if (! SvPOK(res)) {
          SvREFCNT_dec_NN(res);
          return NULL;
@@ -2647,7 +2678,7 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
       * characters that begin a character name alias are alphabetic, otherwise
       * would have to create a isCHARNAME_BEGIN macro */
  
-    if (! UTF) {
+    if (! is_utf8) {
          if (! isALPHAU(*s)) {
              goto bad_charname;
          }
@@ -2721,18 +2752,15 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          /* diag_listed_as: charnames alias definitions may not contain
                             trailing white-space; marked by <-- HERE in %s
           */
-        yyerror_pv(
-            Perl_form(aTHX_
+        *error_msg = Perl_form(aTHX_
              "charnames alias definitions may not contain trailing "
              "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
              (int)(s - backslash_ptr + 1), backslash_ptr,
-            (int)(e - s + 1), s + 1
-            ),
-        UTF ? SVf_UTF8 : 0);
+            (int)(e - s + 1), s + 1);
          return NULL;
      }
  
-    if (SvUTF8(res)) { /* Don't accept malformed input */
+    if (SvUTF8(res)) { /* Don't accept malformed charname value */
          const U8* first_bad_char_loc;
          STRLEN len;
          const char* const str = SvPV_const(res, len);
@@ -2745,13 +2773,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                                                0 /* 0 means don't die */ );
              /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
                                 immediately after '%s' */
-            yyerror_pv(
-              Perl_form(aTHX_
+            *error_msg = Perl_form(aTHX_
                  "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
                   (int) (e - backslash_ptr + 1), backslash_ptr,
-                 (int) ((char *) first_bad_char_loc - str), str
-              ),
-              SVf_UTF8);
+                 (int) ((char *) first_bad_char_loc - str), str);
              return NULL;
          }
      }
@@ -2764,13 +2789,10 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
           * that this print won't run off the end of the string */
          /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
                             in \N{%s} */
-        yyerror_pv(
-          Perl_form(aTHX_
+        *error_msg = Perl_form(aTHX_
              "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
              (int)(s - backslash_ptr + 1), backslash_ptr,
-            (int)(e - s + 1), s + 1
-          ),
-          UTF ? SVf_UTF8 : 0);
+            (int)(e - s + 1), s + 1);
          return NULL;
      }
  
@@ -2778,14 +2800,11 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
          /* diag_listed_as: charnames alias definitions may not contain a
                             sequence of multiple spaces; marked by <-- HERE
                             in %s */
-        yyerror_pv(
-          Perl_form(aTHX_
+        *error_msg = Perl_form(aTHX_
              "charnames alias definitions may not contain a sequence of "
              "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
              (int)(s - backslash_ptr + 1), backslash_ptr,
-            (int)(e - s + 1), s + 1
-          ),
-          UTF ? SVf_UTF8 : 0);
+            (int)(e - s + 1), s + 1);
          return NULL;
  }
  
@@ -2890,8 +2909,8 @@ S_scan_const(pTHX_ char *start)
      bool dorange = FALSE;               /* are we in a translit range? */
      bool didrange = FALSE;              /* did we just finish a range? */
      bool in_charclass = FALSE;          /* within /[...]/ */
-    bool has_utf8 = FALSE;              /* Output constant is UTF8 */
-    bool  this_utf8 = cBOOL(UTF);       /* Is the source string assumed to be
+    bool d_is_utf8 = FALSE;             /* Output constant is UTF8 */
+    bool s_is_utf8 = cBOOL(UTF);        /* Is the source string assumed to be
                                             UTF8?  But, this can show as true
                                             when the source isn't utf8, as for
                                             example when it is entirely composed
@@ -2938,8 +2957,8 @@ S_scan_const(pTHX_ char *start)
      assert(PL_lex_inwhat != OP_TRANSR);
      if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
         /* If we are doing a trans and we know we want UTF8 set expectation */
-       has_utf8   = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
-       this_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+       d_is_utf8  = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
+       s_is_utf8  = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
      }
  
      /* Protect sv from errors and fatal warnings. */
@@ -2995,7 +3014,7 @@ S_scan_const(pTHX_ char *start)
                       * occurences in the constant, except those added by a
                       * backslash escape sequence, like \x{100}.  Mostly, those
                       * set 'has_above_latin1' as appropriate */
-                    if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
                          has_above_latin1 = TRUE;
                      }
  
@@ -3020,7 +3039,7 @@ S_scan_const(pTHX_ char *start)
                       * time through the loop */
                      offset_to_max = d - SvPVX_const(sv);
  
-                    if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
+                    if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
                          has_above_latin1 = TRUE;
                      }
  
@@ -3050,7 +3069,7 @@ S_scan_const(pTHX_ char *start)
                  IV real_range_max = 0;
  #endif
                  /* Get the code point values of the range ends. */
-                if (has_utf8) {
+                if (d_is_utf8) {
                      /* We know the utf8 is valid, because we just constructed
                       * it ourselves in previous loop iterations */
                      min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
@@ -3081,7 +3100,7 @@ S_scan_const(pTHX_ char *start)
                   * get it out of the way now.) */
                  if (UNLIKELY(range_max == range_min)) {
                      d = max_ptr;
-                    if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
+                    if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
                          utf8_variant_count--;
                      }
                      goto range_done;
@@ -3155,7 +3174,7 @@ S_scan_const(pTHX_ char *start)
  
                  /* Here the range contains at least 3 code points */
  
-               if (has_utf8) {
+               if (d_is_utf8) {
  
                      /* If everything in the transliteration is below 256, we
                       * can avoid special handling later.  A translation table
@@ -3241,7 +3260,7 @@ S_scan_const(pTHX_ char *start)
                   * */
                  grow = (range_max - 1) - (range_min + 1) + 1;
  
-                if (has_utf8) {
+                if (d_is_utf8) {
  #ifdef EBCDIC
                      /* In some cases in EBCDIC, we haven't yet calculated a
                       * precise amount needed for the UTF-8 variants.  Just
@@ -3278,7 +3297,7 @@ S_scan_const(pTHX_ char *start)
                      /* Recall that the min and max are now in Unicode terms, so
                       * we have to convert each character to its native
                       * equivalent */
-                    if (has_utf8) {
+                    if (d_is_utf8) {
                          for (i = range_min; i <= range_max; i++) {
                              append_utf8_from_native_byte(
                                                      LATIN1_TO_NATIVE((U8) i),
@@ -3298,7 +3317,7 @@ S_scan_const(pTHX_ char *start)
                      /* Here, no conversions are necessary, which means that the
                       * first character in the range is already in 'd' and
                       * valid, so we can skip overwriting it */
-                    if (has_utf8) {
+                    if (d_is_utf8) {
                          SSize_t i;
                          d += UTF8SKIP(d);
                          for (i = range_min + 1; i <= range_max; i++) {
@@ -3515,7 +3534,7 @@ S_scan_const(pTHX_ char *start)
                 {
                     const char* error;
  
-                   bool valid = grok_bslash_o(&s, PL_bufend,
+                   bool valid = grok_bslash_o(&s, send,
                                                 &uv, &error,
                                                 TRUE, /* Output warning */
                                                 FALSE, /* Not strict */
@@ -3534,7 +3553,7 @@ S_scan_const(pTHX_ char *start)
                 {
                     const char* error;
  
-                   bool valid = grok_bslash_x(&s, PL_bufend,
+                   bool valid = grok_bslash_x(&s, send,
                                                 &uv, &error,
                                                 TRUE, /* Output warning */
                                                 FALSE, /* Not strict */
@@ -3555,7 +3574,7 @@ S_scan_const(pTHX_ char *start)
                     *d++ = (char) uv;
                 }
                 else {
-                   if (!has_utf8 && uv > 255) {
+                   if (!d_is_utf8 && uv > 255) {
  
                          /* Here, 'uv' won't fit unless we convert to UTF-8.
                           * If we've only seen invariants so far, all we have to
@@ -3583,10 +3602,10 @@ S_scan_const(pTHX_ char *start)
                          }
  
                          has_above_latin1 = TRUE;
-                        has_utf8 = TRUE;
+                        d_is_utf8 = TRUE;
                      }
  
-                    if (! has_utf8) {
+                    if (! d_is_utf8) {
                         *d++ = (char)uv;
                          utf8_variant_count++;
                      }
@@ -3728,7 +3747,7 @@ S_scan_const(pTHX_ char *start)
                            * tr/// doesn't care about Unicode rules, so no need
                            * there to upgrade to UTF-8 for small enough code
                            * points */
-                       if (! has_utf8 && (   uv > 0xFF
+                       if (! d_is_utf8 && (   uv > 0xFF
                                             || PL_lex_inwhat != OP_TRANS))
                          {
                             /* See Note on sizing above.  */
@@ -3750,12 +3769,12 @@ S_scan_const(pTHX_ char *start)
                                  d = SvPVX(sv) + SvCUR(sv);
                              }
  
-                           has_utf8 = TRUE;
+                           d_is_utf8 = TRUE;
                              has_above_latin1 = TRUE;
                         }
  
                          /* Add the (Unicode) code point to the output. */
-                       if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
+                       if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
                             *d++ = (char) LATIN1_TO_NATIVE(uv);
                         }
                         else {
@@ -3764,15 +3783,20 @@ S_scan_const(pTHX_ char *start)
                     }
                 }
                 else /* Here is \N{NAME} but not \N{U+...}. */
-                     if ((res = get_and_check_backslash_N_name(s, e)))
-                {
+                     if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
+                {   /* Failed.  We should die eventually, but for now use a NUL
+                       to keep parsing */
+                    *d++ = '\0';
+                }
+                else {  /* Successfully evaluated the name */
                      STRLEN len;
                      const char *str = SvPV_const(res, len);
                      if (PL_lex_inpat) {
  
                         if (! len) { /* The name resolved to an empty string */
-                           Copy("\\N{}", d, 4, char);
-                           d += 4;
+                            const char empty_N[] = "\\N{_}";
+                            Copy(empty_N, d, sizeof(empty_N) - 1, char);
+                            d += sizeof(empty_N) - 1;
                         }
                         else {
                             /* In order to not lose information for the regex
@@ -3914,7 +3938,7 @@ S_scan_const(pTHX_ char *start)
  
                           /* Upgrade destination to be utf8 if this new
                            * component is */
-                       if (! has_utf8 && SvUTF8(res)) {
+                       if (! d_is_utf8 && SvUTF8(res)) {
                             /* See Note on sizing above.  */
                              const STRLEN extra = len + (send - s) + 1;
  
@@ -3932,7 +3956,7 @@ S_scan_const(pTHX_ char *start)
                                                 extra);
                                  d = SvPVX(sv) + SvCUR(sv);
                              }
-                           has_utf8 = TRUE;
+                           d_is_utf8 = TRUE;
                         } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
  
                             /* See Note on sizing above.  (NOTE: SvCUR() is not
@@ -4008,14 +4032,14 @@ S_scan_const(pTHX_ char *start)
          if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
             *d++ = *s++;
          }
-        else if (! this_utf8 && ! has_utf8) {
+        else if (! s_is_utf8 && ! d_is_utf8) {
              /* If neither source nor output is UTF-8, is also a single byte,
               * just copy it; but this byte counts should we later have to
               * convert to UTF-8 */
             *d++ = *s++;
              utf8_variant_count++;
          }
-        else if (this_utf8 && has_utf8) {   /* Both UTF-8, can just copy */
+        else if (s_is_utf8 && d_is_utf8) {   /* Both UTF-8, can just copy */
             const STRLEN len = UTF8SKIP(s);
  
              /* We expect the source to have already been checked for
@@ -4026,43 +4050,36 @@ S_scan_const(pTHX_ char *start)
              d += len;
              s += len;
          }
-        else { /* UTF8ness matters and doesn't match, need to convert */
-           STRLEN len = 1;
-           const UV nextuv   = (this_utf8)
-                                ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
-                                : (UV) ((U8) *s);
-           STRLEN need = UVCHR_SKIP(nextuv);
-
-           if (!has_utf8) {
-               SvCUR_set(sv, d - SvPVX_const(sv));
-               SvPOK_on(sv);
-               *d = '\0';
+        else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
+            STRLEN need = send - s + 1; /* See Note on sizing above. */
  
-                /* See Note on sizing above. */
-                need += (STRLEN)(send - s) + 1;
+            SvCUR_set(sv, d - SvPVX_const(sv));
+            SvPOK_on(sv);
+            *d = '\0';
  
-                if (utf8_variant_count == 0) {
-                    SvUTF8_on(sv);
-                    d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
-                }
-                else {
-                    sv_utf8_upgrade_flags_grow(sv,
-                                               SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                               need);
-                    d = SvPVX(sv) + SvCUR(sv);
-                }
-               has_utf8 = TRUE;
-           } else if (need > len) {
-               /* encoded value larger than old, may need extra space (NOTE:
-                * SvCUR() is not set correctly here).   See Note on sizing
-                * above.  */
-                const STRLEN extra = need + (send - s) + 1;
+            if (utf8_variant_count == 0) {
+                SvUTF8_on(sv);
+                d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
+            }
+            else {
+                sv_utf8_upgrade_flags_grow(sv,
+                                           SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
+                                           need);
+                d = SvPVX(sv) + SvCUR(sv);
+            }
+            d_is_utf8 = TRUE;
+            goto default_action; /* Redo, having upgraded so both are UTF-8 */
+        }
+        else {  /* UTF8ness matters: convert this non-UTF8 source char to
+                   UTF-8 for output.  It will occupy 2 bytes */
+            if (d + 2 >= SvEND(sv)) {
+                const STRLEN extra = 2 + (send - s - 1) + 1;
                 const STRLEN off = d - SvPVX_const(sv);
                 d = off + SvGROW(sv, off + extra);
             }
-           s += len;
-
-           d = (char*)uvchr_to_utf8((U8*)d, nextuv);
+            *d++ = UTF8_EIGHT_BIT_HI(*s);
+            *d++ = UTF8_EIGHT_BIT_LO(*s);
+            s++;
         }
      } /* while loop to process each character */
  
@@ -4074,7 +4091,7 @@ S_scan_const(pTHX_ char *start)
                    " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
  
      SvPOK_on(sv);
-    if (has_utf8) {
+    if (d_is_utf8) {
         SvUTF8_on(sv);
         if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
             PL_parser->lex_sub_op->op_private |=
@@ -4118,7 +4135,7 @@ S_scan_const(pTHX_ char *start)
             }
  
             sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
-                               type, typelen);
+                               type, typelen, NULL);
         }
          pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
      }
@@ -7168,9 +7185,9 @@ Perl_yylex(pTHX)
         if (!anydelim && PL_expect == XSTATE
               && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
             s = d + 1;
-           pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
-           pl_yylval.pval[len] = '\0';
-           pl_yylval.pval[len+1] = UTF ? 1 : 0;
+            pl_yylval.opval =
+                newSVOP(OP_CONST, 0,
+                    newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
             CLINE;
             TOKEN(LABEL);
         }
@@ -9176,11 +9193,15 @@ S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
     Best used as sv=new_constant(..., sv, ...).
     If s, pv are NULL, calls subroutine with one argument,
     and <type> is used with error messages only.
-   <type> is assumed to be well formed UTF-8 */
+   <type> is assumed to be well formed UTF-8.
+
+   If error_msg is not NULL, *error_msg will be set to any error encountered.
+   Otherwise yyerror() will be used to output it */
  
  STATIC SV *
  S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
-              SV *sv, SV *pv, const char *type, STRLEN typelen)
+              SV *sv, SV *pv, const char *type, STRLEN typelen,
+               const char ** error_msg)
  {
      dSP;
      HV * table = GvHV(PL_hintgv);               /* ^H */
@@ -9195,13 +9216,6 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
      if (*key == 'c') { assert (strEQ(key, "charnames")); }
      assert(type || s);
  
-    /* charnames doesn't work well if there have been errors found */
-    if (PL_error_count > 0 && *key == 'c')
-    {
-       SvREFCNT_dec_NN(sv);
-       return &PL_sv_undef;
-    }
-
      sv_2mortal(sv);                    /* Parent created it permanently */
      if (!table
         || ! (PL_hints & HINT_LOCALIZE_HH)
@@ -9256,7 +9270,12 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
                                      (type ? type: s), why1, why2, why3);
              }
          }
-       yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+        if (error_msg) {
+            *error_msg = msg;
+        }
+        else {
+            yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
+        }
         return SvREFCNT_inc_simple_NN(sv);
      }
    now_ok:
@@ -10000,12 +10019,15 @@ S_scan_heredoc(pTHX_ char *s)
      e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
      *PL_tokenbuf = '\n';
      peek = s;
+
      if (*peek == '~') {
         indented = TRUE;
         peek++; s++;
      }
+
      while (SPACE_OR_TAB(*peek))
         peek++;
+
      if (*peek == '`' || *peek == '\'' || *peek =='"') {
         s = peek;
         term = *s++;
@@ -10021,19 +10043,25 @@ S_scan_heredoc(pTHX_ char *s)
             s++, term = '\'';
         else
             term = '"';
+
         if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
             Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
+
         peek = s;
+
          while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
             peek += UTF ? UTF8SKIP(peek) : 1;
         }
+
         len = (peek - s >= e - d) ? (e - d) : (peek - s);
         Copy(s, d, len, char);
         s += len;
         d += len;
      }
+
      if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
         Perl_croak(aTHX_ "Delimiter for here document is too long");
+
      *d++ = '\n';
      *d = '\0';
      len = d - PL_tokenbuf;
@@ -10076,6 +10104,7 @@ S_scan_heredoc(pTHX_ char *s)
  
      PL_multi_start = origline + 1 + PL_parser->herelines;
      PL_multi_open = PL_multi_close = '<';
+
      /* inside a string eval or quote-like operator */
      if (!infile || PL_lex_inwhat) {
         SV *linestr;
@@ -10086,43 +10115,47 @@ S_scan_heredoc(pTHX_ char *s)
            entered.  But we need them set here. */
         shared->ls_bufptr  = s;
         shared->ls_linestr = PL_linestr;
-       if (PL_lex_inwhat)
-         /* Look for a newline.  If the current buffer does not have one,
-            peek into the line buffer of the parent lexing scope, going
-            up as many levels as necessary to find one with a newline
-            after bufptr.
-          */
-         while (!(s = (char *)memchr(
-                   (void *)shared->ls_bufptr, '\n',
-                   SvEND(shared->ls_linestr)-shared->ls_bufptr
-               ))) {
-           shared = shared->ls_prev;
-           /* shared is only null if we have gone beyond the outermost
-              lexing scope.  In a file, we will have broken out of the
-              loop in the previous iteration.  In an eval, the string buf-
-              fer ends with "\n;", so the while condition above will have
-              evaluated to false.  So shared can never be null.  Or so you
-              might think.  Odd syntax errors like s;@{<<; can gobble up
-              the implicit semicolon at the end of a flie, causing the
-              file handle to be closed even when we are not in a string
-              eval.  So shared may be null in that case.
-               (Closing '}' here to balance the earlier open brace for
-               editors that look for matched pairs.) */
-           if (UNLIKELY(!shared))
-               goto interminable;
-           /* A LEXSHARED struct with a null ls_prev pointer is the outer-
-              most lexing scope.  In a file, shared->ls_linestr at that
-              level is just one line, so there is no body to steal. */
-           if (infile && !shared->ls_prev) {
-               s = olds;
-               goto streaming;
-           }
-         }
+
+        if (PL_lex_inwhat) {
+            /* Look for a newline.  If the current buffer does not have one,
+             peek into the line buffer of the parent lexing scope, going
+             up as many levels as necessary to find one with a newline
+             after bufptr.
+            */
+           while (!(s = (char *)memchr(
+                                (void *)shared->ls_bufptr, '\n',
+                                SvEND(shared->ls_linestr)-shared->ls_bufptr
+               )))
+            {
+                shared = shared->ls_prev;
+                /* shared is only null if we have gone beyond the outermost
+                   lexing scope.  In a file, we will have broken out of the
+                   loop in the previous iteration.  In an eval, the string buf-
+                   fer ends with "\n;", so the while condition above will have
+                   evaluated to false.  So shared can never be null.  Or so you
+                   might think.  Odd syntax errors like s;@{<<; can gobble up
+                   the implicit semicolon at the end of a flie, causing the
+                   file handle to be closed even when we are not in a string
+                   eval.  So shared may be null in that case.
+                   (Closing '>>}' here to balance the earlier open brace for
+                   editors that look for matched pairs.) */
+                if (UNLIKELY(!shared))
+                    goto interminable;
+                /* A LEXSHARED struct with a null ls_prev pointer is the outer-
+                   most lexing scope.  In a file, shared->ls_linestr at that
+                   level is just one line, so there is no body to steal. */
+                if (infile && !shared->ls_prev) {
+                    s = olds;
+                    goto streaming;
+                }
+            }
+        }
         else {  /* eval or we've already hit EOF */
             s = (char*)memchr((void*)s, '\n', PL_bufend - s);
             if (!s)
                  goto interminable;
         }
+
         linestr = shared->ls_linestr;
         bufend = SvEND(linestr);
         d = s;
@@ -10142,7 +10175,6 @@ S_scan_heredoc(pTHX_ char *s)
                         if (! SPACE_OR_TAB(*backup)) {
                             break;
                         }
-
                         indent_len++;
                     }
  
@@ -10157,7 +10189,8 @@ S_scan_heredoc(pTHX_ char *s)
                     }
                 }
             }
-       } else {
+       }
+        else {
             while (s < bufend - len + 1
                    && memNE(s,PL_tokenbuf,len) )
             {
@@ -10169,6 +10202,7 @@ S_scan_heredoc(pTHX_ char *s)
         if (s >= bufend - len + 1) {
             goto interminable;
         }
+
         sv_setpvn(tmpstr,d+1,s-d);
         s += len - 1;
         /* the preceding stmt passes a newline */
@@ -10191,6 +10225,7 @@ S_scan_heredoc(pTHX_ char *s)
                                 bufend - shared->re_eval_start);
             shared->re_eval_start -= s-d;
         }
+
         if (cxstack_ix >= 0
              && CxTYPE(cx) == CXt_EVAL
              && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
@@ -10199,126 +10234,139 @@ S_scan_heredoc(pTHX_ char *s)
             cx->blk_eval.cur_text = newSVsv(linestr);
             cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
         }
+
         /* Copy everything from s onwards back to d. */
         Move(s,d,bufend-s + 1,char);
         SvCUR_set(linestr, SvCUR(linestr) - (s-d));
         /* Setting PL_bufend only applies when we have not dug deeper
            into other scopes, because sublex_done sets PL_bufend to
            SvEND(PL_linestr). */
-       if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
+       if (shared == PL_parser->lex_shared)
+            PL_bufend = SvEND(linestr);
         s = olds;
      }
-    else
-    {
-      SV *linestr_save;
-      char *oldbufptr_save;
-      char *oldoldbufptr_save;
-     streaming:
-      SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
-      term = PL_tokenbuf[1];
-      len--;
-      linestr_save = PL_linestr; /* must restore this afterwards */
-      d = s;                    /* and this */
-      oldbufptr_save = PL_oldbufptr;
-      oldoldbufptr_save = PL_oldoldbufptr;
-      PL_linestr = newSVpvs("");
-      PL_bufend = SvPVX(PL_linestr);
-      while (1) {
-       PL_bufptr = PL_bufend;
-       CopLINE_set(PL_curcop,
-                   origline + 1 + PL_parser->herelines);
-       if (!lex_next_chunk(LEX_NO_TERM)
-        && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
-           /* Simply freeing linestr_save might seem simpler here, as it
-              does not matter what PL_linestr points to, since we are
-              about to croak; but in a quote-like op, linestr_save
-              will have been prospectively freed already, via
-              SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
-              restore PL_linestr. */
-           SvREFCNT_dec_NN(PL_linestr);
-           PL_linestr = linestr_save;
-            PL_oldbufptr = oldbufptr_save;
-            PL_oldoldbufptr = oldoldbufptr_save;
-           goto interminable;
-       }
-       CopLINE_set(PL_curcop, origline);
-       if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
-            s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
-            /* ^That should be enough to avoid this needing to grow:  */
-           sv_catpvs(PL_linestr, "\n\0");
-            assert(s == SvPVX(PL_linestr));
-            PL_bufend = SvEND(PL_linestr);
-       }
-       s = PL_bufptr;
-       PL_parser->herelines++;
-       PL_last_lop = PL_last_uni = NULL;
+    else {
+        SV *linestr_save;
+        char *oldbufptr_save;
+        char *oldoldbufptr_save;
+      streaming:
+        SvPVCLEAR(tmpstr);   /* avoid "uninitialized" warning */
+        term = PL_tokenbuf[1];
+        len--;
+        linestr_save = PL_linestr; /* must restore this afterwards */
+        d = s;                  /* and this */
+        oldbufptr_save = PL_oldbufptr;
+        oldoldbufptr_save = PL_oldoldbufptr;
+        PL_linestr = newSVpvs("");
+        PL_bufend = SvPVX(PL_linestr);
+
+        while (1) {
+            PL_bufptr = PL_bufend;
+            CopLINE_set(PL_curcop,
+                        origline + 1 + PL_parser->herelines);
+
+            if (   !lex_next_chunk(LEX_NO_TERM)
+                && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
+            {
+                /* Simply freeing linestr_save might seem simpler here, as it
+                   does not matter what PL_linestr points to, since we are
+                   about to croak; but in a quote-like op, linestr_save
+                   will have been prospectively freed already, via
+                   SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
+                   restore PL_linestr. */
+                SvREFCNT_dec_NN(PL_linestr);
+                PL_linestr = linestr_save;
+                PL_oldbufptr = oldbufptr_save;
+                PL_oldoldbufptr = oldoldbufptr_save;
+                goto interminable;
+            }
+
+            CopLINE_set(PL_curcop, origline);
+
+            if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
+                s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
+                /* ^That should be enough to avoid this needing to grow:  */
+                sv_catpvs(PL_linestr, "\n\0");
+                assert(s == SvPVX(PL_linestr));
+                PL_bufend = SvEND(PL_linestr);
+            }
+
+            s = PL_bufptr;
+            PL_parser->herelines++;
+            PL_last_lop = PL_last_uni = NULL;
+
  #ifndef PERL_STRICT_CR
-       if (PL_bufend - PL_linestart >= 2) {
-           if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
-                || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
-           {
-               PL_bufend[-2] = '\n';
-               PL_bufend--;
-               SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
-           }
-           else if (PL_bufend[-1] == '\r')
-               PL_bufend[-1] = '\n';
-       }
-       else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
-           PL_bufend[-1] = '\n';
+            if (PL_bufend - PL_linestart >= 2) {
+                if (   (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
+                    || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
+                {
+                    PL_bufend[-2] = '\n';
+                    PL_bufend--;
+                    SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
+                }
+                else if (PL_bufend[-1] == '\r')
+                    PL_bufend[-1] = '\n';
+            }
+            else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
+                PL_bufend[-1] = '\n';
  #endif
-       if (indented && (PL_bufend-s) >= len) {
-           char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
  
-           if (found) {
-               char *backup = found;
-               indent_len = 0;
+            if (indented && (PL_bufend-s) >= len) {
+                char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
  
-               /* Only valid if it's preceded by whitespace only */
-               while (backup != s && --backup >= s) {
-                   if (! SPACE_OR_TAB(*backup)) {
-                       break;
-                   }
-                   indent_len++;
-               }
+                if (found) {
+                    char *backup = found;
+                    indent_len = 0;
  
-               /* All whitespace or none! */
-               if (backup == found || SPACE_OR_TAB(*backup)) {
-                   Newx(indent, indent_len + 1, char);
-                   memcpy(indent, backup, indent_len);
-                   indent[indent_len] = 0;
-                   SvREFCNT_dec(PL_linestr);
-                   PL_linestr = linestr_save;
-                   PL_linestart = SvPVX(linestr_save);
-                   PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-                   PL_oldbufptr = oldbufptr_save;
-                   PL_oldoldbufptr = oldoldbufptr_save;
-                   s = d;
-                   break;
-               }
-           }
+                    /* Only valid if it's preceded by whitespace only */
+                    while (backup != s && --backup >= s) {
+                        if (! SPACE_OR_TAB(*backup)) {
+                            break;
+                        }
+                        indent_len++;
+                    }
  
-           /* Didn't find it */
-           sv_catsv(tmpstr,PL_linestr);
-       } else {
-           if (*s == term && PL_bufend-s >= len
-               && memEQ(s,PL_tokenbuf + 1,len))
-           {
-               SvREFCNT_dec(PL_linestr);
-               PL_linestr = linestr_save;
-               PL_linestart = SvPVX(linestr_save);
-               PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-               PL_oldbufptr = oldbufptr_save;
-               PL_oldoldbufptr = oldoldbufptr_save;
-               s = d;
-               break;
-           } else {
-               sv_catsv(tmpstr,PL_linestr);
-           }
-       }
-      }
+                    /* All whitespace or none! */
+                    if (backup == found || SPACE_OR_TAB(*backup)) {
+                        Newx(indent, indent_len + 1, char);
+                        memcpy(indent, backup, indent_len);
+                        indent[indent_len] = 0;
+                        SvREFCNT_dec(PL_linestr);
+                        PL_linestr = linestr_save;
+                        PL_linestart = SvPVX(linestr_save);
+                        PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+                        PL_oldbufptr = oldbufptr_save;
+                        PL_oldoldbufptr = oldoldbufptr_save;
+                        s = d;
+                        break;
+                    }
+                }
+
+                /* Didn't find it */
+                sv_catsv(tmpstr,PL_linestr);
+            }
+            else {
+                if (*s == term && PL_bufend-s >= len
+                    && memEQ(s,PL_tokenbuf + 1,len))
+                {
+                    SvREFCNT_dec(PL_linestr);
+                    PL_linestr = linestr_save;
+                    PL_linestart = SvPVX(linestr_save);
+                    PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+                    PL_oldbufptr = oldbufptr_save;
+                    PL_oldoldbufptr = oldoldbufptr_save;
+                    s = d;
+                    break;
+                }
+                else {
+                    sv_catsv(tmpstr,PL_linestr);
+                }
+            }
+        } /* while (1) */
      }
+
      PL_multi_end = origline + PL_parser->herelines;
+
      if (indented && indent) {
         STRLEN linecount = 1;
         STRLEN herelen = SvCUR(tmpstr);
@@ -10336,50 +10384,58 @@ S_scan_heredoc(pTHX_ char *s)
                 linecount++;
  
             /* Found our indentation? Strip it */
-           } else if (se - ss >= indent_len
+           }
+            else if (se - ss >= indent_len
                        && memEQ(ss, indent, indent_len))
             {
                 STRLEN le = 0;
-
                 ss += indent_len;
  
                 while ((ss + le) < se && *(ss + le) != '\n')
                     le++;
  
                 sv_catpvn(newstr, ss, le);
-
                 ss += le;
  
             /* Line doesn't begin with our indentation? Croak */
-           } else {
+           }
+            else {
+                Safefree(indent);
                 Perl_croak(aTHX_
                     "Indentation on line %d of here-doc doesn't match delimiter",
                     (int)linecount
                 );
             }
-       }
+       } /* while */
+
          /* avoid sv_setsv() as we dont wan't to COW here */
          sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
         Safefree(indent);
         SvREFCNT_dec_NN(newstr);
      }
+
      if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
         SvPV_shrink_to_cur(tmpstr);
      }
+
      if (!IN_BYTES) {
         if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
             SvUTF8_on(tmpstr);
      }
+
      PL_lex_stuff = tmpstr;
      pl_yylval.ival = op_type;
      return s;
  
    interminable:
+    if (indent)
+       Safefree(indent);
      SvREFCNT_dec(tmpstr);
      CopLINE_set(PL_curcop, origline);
      missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
  }
  
+
  /* scan_inputsymbol
     takes: position of first '<' in input buffer
     returns: position of first char following the matching '>' in
@@ -10603,7 +10659,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
      char term;                 /* terminating character */
      char *to;                  /* current position in the sv's data */
      I32 brackets = 1;          /* bracket nesting level */
-    bool has_utf8 = FALSE;     /* is there any utf8 content? */
+    bool d_is_utf8 = FALSE;    /* is there any utf8 content? */
      IV termcode;               /* terminating char. code */
      U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
      STRLEN termlen;            /* length of terminating string */
@@ -10718,8 +10774,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                         break;
                      }
                 }
-               else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
-                   has_utf8 = TRUE;
+               else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
+                   d_is_utf8 = TRUE;
                  }
  
                 *to = *s;
@@ -10752,8 +10808,8 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
                     break;
                 else if ((UV)*s == PL_multi_open)
                     brackets++;
-               else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
-                   has_utf8 = TRUE;
+               else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
+                   d_is_utf8 = TRUE;
                 *to = *s;
             }
         }
@@ -10803,7 +10859,7 @@ S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re
             sv_catpvn(sv, s, termlen);
      s += termlen;
  
-    if (has_utf8)
+    if (d_is_utf8)
         SvUTF8_on(sv);
  
      PL_multi_end = CopLINE(PL_curcop);
@@ -11246,9 +11302,10 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             }
             if (just_zero && (PL_hints & HINT_NEW_INTEGER))
                 sv = new_constant(start, s - start, "integer",
-                                 sv, NULL, NULL, 0);
+                                 sv, NULL, NULL, 0, NULL);
             else if (PL_hints & HINT_NEW_BINARY)
-               sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
+               sv = new_constant(start, s - start, "binary",
+                                  sv, NULL, NULL, 0, NULL);
         }
         break;
  
@@ -11453,7 +11510,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
             const char *const key = floatit ? "float" : "integer";
             const STRLEN keylen = floatit ? 5 : 7;
             sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
-                               key, keylen, sv, NULL, NULL, 0);
+                               key, keylen, sv, NULL, NULL, 0, NULL);
         }
         break;
  
@@ -12513,10 +12570,11 @@ Perl_parse_label(pTHX_ U32 flags)
      if (PL_nexttoke) {
         PL_parser->yychar = yylex();
         if (PL_parser->yychar == LABEL) {
-           char * const lpv = pl_yylval.pval;
-           STRLEN llen = strlen(lpv);
+           SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
             PL_parser->yychar = YYEMPTY;
-           return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
+           cSVOPx(pl_yylval.opval)->op_sv = NULL;
+           op_free(pl_yylval.opval);
+           return labelsv;
         } else {
             yyunlex();
             goto no_label;