Change was_lvalue_sub back to X; spell out the only use of it

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index e55b4b3..ef215a5 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -667,11 +667,15 @@ code in I<line> comes first and must consist of complete lines of input,
  and I<rsfp> supplies the remainder of the source.
  
  The I<flags> parameter is reserved for future use, and must always
-be zero.
+be zero, except for one flag that is currently reserved for perl's internal
+use.
  
  =cut
  */
  
+/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
+   can share filters with the current parser. */
+
  void
  Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
  {
@@ -679,7 +683,7 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
      const char *s = NULL;
      STRLEN len;
      yy_parser *parser, *oparser;
-    if (flags)
+    if (flags && flags != LEX_START_SAME_FILTER)
         Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
  
      /* create and initialise a parser */
@@ -708,7 +712,10 @@ Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
      parser->lex_state = LEX_NORMAL;
      parser->expect = XSTATE;
      parser->rsfp = rsfp;
-    parser->rsfp_filters = newAV();
+    parser->rsfp_filters =
+      !(flags & LEX_START_SAME_FILTER) || !oparser
+        ? newAV()
+        : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
  
      Newx(parser->lex_brackstack, 120, char);
      Newx(parser->lex_casestack, 12, char);
@@ -3140,12 +3147,22 @@ S_scan_const(pTHX_ char *start)
  
                     if (PL_lex_inpat) {
  
-                       /* Pass through to the regex compiler unchanged.  The
-                        * reason we evaluated the number above is to make sure
-                        * there wasn't a syntax error. */
+                       /* On non-EBCDIC platforms, pass through to the regex
+                        * compiler unchanged.  The reason we evaluated the
+                        * number above is to make sure there wasn't a syntax
+                        * error.  But on EBCDIC we convert to native so
+                        * downstream code can continue to assume it's native
+                        */
                         s -= 5;     /* Include the '\N{U+' */
+#ifdef EBCDIC
+                       d += my_snprintf(d, e - s + 1 + 1,  /* includes the }
+                                                              and the \0 */
+                                   "\\N{U+%X}",
+                                   (unsigned int) UNI_TO_NATIVE(uv));
+#else
                         Copy(s, d, e - s + 1, char);    /* 1 = include the } */
                         d += e - s + 1;
+#endif
                     }
                     else {  /* Not a pattern: convert the hex to string */
  
@@ -3239,10 +3256,13 @@ S_scan_const(pTHX_ char *start)
                             }
  
                             /* Convert first code point to hex, including the
-                            * boiler plate before it */
+                            * boiler plate before it.  For all these, we
+                            * convert to native format so that downstream code
+                            * can continue to assume the input is native */
                             output_length =
                                 my_snprintf(hex_string, sizeof(hex_string),
-                                           "\\N{U+%X", (unsigned int) uv);
+                                           "\\N{U+%X",
+                                           (unsigned int) UNI_TO_NATIVE(uv));
  
                             /* Make sure there is enough space to hold it */
                             d = off + SvGROW(sv, off
@@ -3267,7 +3287,8 @@ S_scan_const(pTHX_ char *start)
  
                                 output_length =
                                     my_snprintf(hex_string, sizeof(hex_string),
-                                               ".%X", (unsigned int) uv);
+                                           ".%X",
+                                           (unsigned int) UNI_TO_NATIVE(uv));
  
                                 d = off + SvGROW(sv, off
                                                      + output_length
@@ -4771,7 +4792,13 @@ Perl_yylex(pTHX)
                       *(U8*)s == 0xEF ||
                       *(U8*)s >= 0xFE ||
                       s[1] == 0)) {
-               bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
+               Off_t offset = (IV)PerlIO_tell(PL_rsfp);
+               bof = (offset == (Off_t)SvCUR(PL_linestr));
+#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
+               /* offset may include swallowed CR */
+               if (!bof)
+                   bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
+#endif
                 if (bof) {
                     PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
                     s = swallow_bom((U8*)s);
@@ -6583,7 +6610,7 @@ Perl_yylex(pTHX)
                     goto safe_bareword;
  
                 {
-                   OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
+                   OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
                     const_op->op_private = OPpCONST_BARE;
                     rv2cv_op = newCVREF(0, const_op);
                 }
@@ -7028,7 +7055,7 @@ Perl_yylex(pTHX)
                 s += 2;
                 d = s;
                 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
-               if (!(tmp = keyword(PL_tokenbuf, len, 0)))
+               if (!(tmp = keyword(PL_tokenbuf, len, 1)))
                     Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
                 if (tmp < 0)
                     tmp = -tmp;
@@ -7072,12 +7099,6 @@ Perl_yylex(pTHX)
             UNI(OP_CHOP);
  
         case KEY_continue:
-           /* When 'use switch' is in effect, continue has a dual
-              life as a control operator. */
-           {
-               if (!FEATURE_IS_ENABLED("switch"))
-                   PREBLOCK(CONTINUE);
-               else {
                     /* We have to disambiguate the two senses of
                       "continue". If the next token is a '{' then
                       treat it as the start of a continue block;
@@ -7088,8 +7109,6 @@ Perl_yylex(pTHX)
             PREBLOCK(CONTINUE);
                     else
                         FUN0(OP_CONTINUE);
-               }
-           }
  
         case KEY_chdir:
             /* may use HOME */
@@ -7626,7 +7645,8 @@ Perl_yylex(pTHX)
                 missingterm(NULL);
             PL_expect = XOPERATOR;
             if (SvCUR(PL_lex_stuff)) {
-               int warned = 0;
+               int warned_comma = !ckWARN(WARN_QW);
+               int warned_comment = warned_comma;
                 d = SvPV_force(PL_lex_stuff, len);
                 while (len) {
                     for (; isSPACE(*d) && len; --len, ++d)
@@ -7634,17 +7654,17 @@ Perl_yylex(pTHX)
                     if (len) {
                         SV *sv;
                         const char *b = d;
-                       if (!warned && ckWARN(WARN_QW)) {
+                       if (!warned_comma || !warned_comment) {
                             for (; !isSPACE(*d) && len; --len, ++d) {
-                               if (*d == ',') {
+                               if (!warned_comma && *d == ',') {
                                     Perl_warner(aTHX_ packWARN(WARN_QW),
                                         "Possible attempt to separate words with commas");
-                                   ++warned;
+                                   ++warned_comma;
                                 }
-                               else if (*d == '#') {
+                               else if (!warned_comment && *d == '#') {
                                     Perl_warner(aTHX_ packWARN(WARN_QW),
                                         "Possible attempt to put comments in qw() list");
-                                   ++warned;
+                                   ++warned_comment;
                                 }
                             }
                         }
@@ -8295,7 +8315,7 @@ S_pending_ident(pTHX)
                  yyerror(Perl_form(aTHX_ "No package name allowed for "
                                    "variable %s in \"our\"",
                                    PL_tokenbuf));
-            tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+            tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
          }
          else {
              if (has_colon)
@@ -8303,7 +8323,8 @@ S_pending_ident(pTHX)
                             PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
  
              pl_yylval.opval = newOP(OP_PADANY, 0);
-            pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
+            pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
+                                                        UTF ? SVf_UTF8 : 0);
              return PRIVATEREF;
          }
      }
@@ -8322,7 +8343,8 @@ S_pending_ident(pTHX)
  
      if (!has_colon) {
         if (!PL_in_my)
-           tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
+           tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
+                                    UTF ? SVf_UTF8 : 0);
          if (tmp != NOT_IN_PAD) {
              /* might be an "our" variable" */
              if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
@@ -8774,13 +8796,17 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
  }
  
  static bool
-S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
+S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
  
      /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
       * the parse starting at 's', based on the subset that are valid in this
       * context input to this routine in 'valid_flags'. Advances s.  Returns
       * TRUE if the input was a valid flag, so the next char may be as well;
-     * otherwise FALSE */
+     * otherwise FALSE. 'charset' should point to a NUL upon first call on the
+     * current regex.  This routine will set it to any charset modifier found.
+     * The caller shouldn't change it.  This way, another charset modifier
+     * encountered in the parse can be detected as an error, as we have decided
+     * allow only one */
  
      const char c = **s;
  
@@ -8828,7 +8854,11 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
                     "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'.  In Perl 5.16, it will be resolved the other way");
                 return FALSE;
             }
+           if (*charset) {
+               goto multiple_charsets;
+           }
             set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
+           *charset = c;
             break;
         case UNICODE_PAT_MOD:
             /* In 5.14, qr//unless and qr//until are legal but deprecated; the
@@ -8836,7 +8866,11 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
             if (*((*s) + 1) == 'n') {
                 goto deprecate;
             }
+           if (*charset) {
+               goto multiple_charsets;
+           }
             set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
+           *charset = c;
             break;
         case ASCII_RESTRICT_PAT_MOD:
             /* In 5.14, qr//and is legal but deprecated; the 'n' means they
@@ -8844,17 +8878,30 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
             if (*((*s) + 1) == 'n') {
                 goto deprecate;
             }
-           if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
-               /* Doubled modifier implies more restricted */
-               set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
-               (*s)++;
+
+           if (! *charset) {
+               set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
             }
             else {
-               set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
+
+               /* Error if previous modifier wasn't an 'a', but if it was, see
+                * if, and accept, a second occurrence (only) */
+               if (*charset != 'a'
+                   || get_regex_charset(*pmfl)
+                       != REGEX_ASCII_RESTRICTED_CHARSET)
+               {
+                       goto multiple_charsets;
+               }
+               set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
             }
+           *charset = c;
             break;
         case DEPENDS_PAT_MOD:
+           if (*charset) {
+               goto multiple_charsets;
+           }
             set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
+           *charset = c;
             break;
      }
  
@@ -8865,6 +8912,21 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s) {
         Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
             "Having no space between pattern and following word is deprecated");
          return FALSE;
+
+    multiple_charsets:
+       if (*charset != c) {
+           yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
+       }
+       else if (c == 'a') {
+           yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
+       }
+       else {
+           yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
+       }
+
+       /* Pretend that it worked, so will continue processing before dieing */
+       (*s)++;
+       return TRUE;
  }
  
  STATIC char *
@@ -8875,6 +8937,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
      char *s = scan_str(start,!!PL_madskills,FALSE);
      const char * const valid_flags =
         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
+    char charset = '\0';    /* character set modifier */
  #ifdef PERL_MAD
      char *modstart;
  #endif
@@ -8916,7 +8979,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
  #ifdef PERL_MAD
      modstart = s;
  #endif
-    while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s)) {};
+    while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
  #ifdef PERL_MAD
      if (PL_madskills && modstart != s) {
         SV* tmptoken = newSVpvn(modstart, s - modstart);
@@ -8943,6 +9006,7 @@ S_scan_subst(pTHX_ char *start)
      register PMOP *pm;
      I32 first_start;
      I32 es = 0;
+    char charset = '\0';    /* character set modifier */
  #ifdef PERL_MAD
      char *modstart;
  #endif
@@ -8995,7 +9059,8 @@ S_scan_subst(pTHX_ char *start)
             s++;
             es++;
         }
-       else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s)) {
+       else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
+       {
             break;
         }
      }
@@ -9373,6 +9438,7 @@ S_scan_heredoc(pTHX_ register char *s)
         if (*s == term && memEQ(s,PL_tokenbuf,len)) {
             STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
             *(SvPVX(PL_linestr) + off ) = ' ';
+           lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
             sv_catsv(PL_linestr,herewas);
             PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
             s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
@@ -9498,7 +9564,7 @@ S_scan_inputsymbol(pTHX_ char *start)
             /* try to find it in the pad for this block, otherwise find
                add symbol table ops
             */
-           const PADOFFSET tmp = pad_findmy(d, len, 0);
+           const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
             if (tmp != NOT_IN_PAD) {
                 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
                     HV * const stash = PAD_COMPNAME_OURSTASH(tmp);