refactor pp_tied

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 88524b4..692ab11 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -869,7 +869,7 @@ through normal scalar means.
  
  Direct pointer to the end of the chunk of text currently being lexed, the
  end of the lexer buffer.  This is equal to C<SvPVX(PL_parser-E<gt>linestr)
-+ SvCUR(PL_parser-E<gt>linestr)>.  A NUL character (zero octet) is
++ SvCUR(PL_parser-E<gt>linestr)>.  A C<NUL> character (zero octet) is
  always located at the end of the buffer, and does not count as part of
  the buffer's contents.
  
@@ -936,7 +936,7 @@ Perl_lex_bufutf8(pTHX)
  =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
  
  Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
-at least I<len> octets (including terminating NUL).  Returns a
+at least I<len> octets (including terminating C<NUL>).  Returns a
  pointer to the reallocated buffer.  This is necessary before making
  any direct modification of the buffer that would increase its length.
  L</lex_stuff_pvn> provides a more convenient way to insert text into
@@ -2865,8 +2865,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
       * look to see that the first character is legal.  Then loop through the
       * rest checking that each is a continuation */
  
-    /* This code needs to be sync'ed with a regex in _charnames.pm which does
-     * the same thing */
+    /* This code makes the reasonable assumption that the only Latin1-range
+     * characters that begin a character name alias are alphabetic, otherwise
+     * would have to create a isCHARNAME_BEGIN macro */
  
      if (! UTF) {
          if (! isALPHAU(*s)) {
@@ -2877,18 +2878,16 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
              if (! isCHARNAME_CONT(*s)) {
                  goto bad_charname;
              }
-           if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
+           if (*s == ' ' && *(s-1) == ' ') {
+                goto multi_spaces;
+            }
+           if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
                  Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                           "A sequence of multiple spaces in a charnames "
+                           "NO-BREAK SPACE in a charnames "
                             "alias definition is deprecated");
              }
              s++;
          }
-        if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
-            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                        "Trailing white-space in a charnames alias "
-                        "definition is deprecated");
-        }
      }
      else {
          /* Similarly for utf8.  For invariants can check directly; for other
@@ -2924,11 +2923,8 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  if (! isCHARNAME_CONT(*s)) {
                      goto bad_charname;
                  }
-                if (*s == ' ' && *(s-1) == ' '
-                 && ckWARN_d(WARN_DEPRECATED)) {
-                    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                               "A sequence of multiple spaces in a charnam"
-                               "es alias definition is deprecated");
+                if (*s == ' ' && *(s-1) == ' ') {
+                    goto multi_spaces;
                  }
                  s++;
              }
@@ -2937,6 +2933,14 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  {
                      goto bad_charname;
                  }
+                if (*s == *NBSP_UTF8
+                    && *(s+1) == *(NBSP_UTF8+1)
+                    && ckWARN_d(WARN_DEPRECATED))
+                {
+                    Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
+                                "NO-BREAK SPACE in a charnames "
+                                "alias definition is deprecated");
+                }
                  s += 2;
              }
              else {
@@ -2953,11 +2957,17 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
                  s += UTF8SKIP(s);
              }
          }
-        if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
-            Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
-                       "Trailing white-space in a charnames alias "
-                       "definition is deprecated");
-        }
+    }
+    if (*(s-1) == ' ') {
+        yyerror_pv(
+            Perl_form(aTHX_
+            "charnames alias definitions may not contain trailing "
+            "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
+            (int)(s - backslash_ptr + 1), backslash_ptr,
+            (int)(e - s + 1), s + 1
+            ),
+        UTF ? SVf_UTF8 : 0);
+        return NULL;
      }
  
      if (SvUTF8(res)) { /* Don't accept malformed input */
@@ -2988,19 +2998,29 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
      return res;
  
    bad_charname: {
-        int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
  
          /* The final %.*s makes sure that should the trailing NUL be missing
           * that this print won't run off the end of the string */
          yyerror_pv(
            Perl_form(aTHX_
              "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
-            (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
-            (int)(e - s + bad_char_size), s + bad_char_size
+            (int)(s - backslash_ptr + 1), backslash_ptr,
+            (int)(e - s + 1), s + 1
            ),
            UTF ? SVf_UTF8 : 0);
          return NULL;
      }
+
+  multi_spaces:
+        yyerror_pv(
+          Perl_form(aTHX_
+            "charnames alias definitions may not contain a sequence of "
+            "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
+            (int)(s - backslash_ptr + 1), backslash_ptr,
+            (int)(e - s + 1), s + 1
+          ),
+          UTF ? SVf_UTF8 : 0);
+        return NULL;
  }
  
  /*
@@ -3415,7 +3435,7 @@ S_scan_const(pTHX_ char *start)
                     *d++ = *s++;
                     continue;
                 }
-               /* FALL THROUGH */
+               /* FALLTHROUGH */
             default:
                 {
                     if ((isALPHANUMERIC(*s)))
@@ -3759,6 +3779,10 @@ S_scan_const(pTHX_ char *start)
                             const STRLEN off = d - SvPVX_const(sv);
                             d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
                         }
+                        if (! SvUTF8(res)) {    /* Make sure is \N{} return is UTF-8 */
+                            sv_utf8_upgrade(res);
+                            str = SvPV_const(res, len);
+                        }
                         Copy(str, d, len, char);
                         d += len;
                     }
@@ -5032,7 +5056,7 @@ Perl_yylex(pTHX)
             PL_lex_state = LEX_INTERPNORMAL;    /* false alarm, more expr */
             break;
         }
-       /* FALL THROUGH */
+       /* FALLTHROUGH */
  
      case LEX_INTERPEND:
         if (PL_lex_dojoin) {
@@ -6133,7 +6157,7 @@ Perl_yylex(pTHX)
                         force_next('-');
                 }
             }
-           /* FALL THROUGH */
+           /* FALLTHROUGH */
         case XATTRBLOCK:
         case XBLOCK:
             PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
@@ -6747,6 +6771,7 @@ Perl_yylex(pTHX)
             s += 2;
             AOPERATOR(DORDOR);
         }
+       /* FALLTHROUGH */
       case '?':                 /* may either be conditional or pattern */
         if (PL_expect == XOPERATOR) {
              char tmp = *s++;
@@ -6837,7 +6862,7 @@ Perl_yylex(pTHX)
             }
             Aop(OP_CONCAT);
         }
-       /* FALL THROUGH */
+       /* FALLTHROUGH */
      case '0': case '1': case '2': case '3': case '4':
      case '5': case '6': case '7': case '8': case '9':
         s = scan_num(s, &pl_yylval);
@@ -6848,6 +6873,8 @@ Perl_yylex(pTHX)
  
      case '\'':
         s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
+       if (!s)
+           missingterm(NULL);
         COPLINE_SET_FROM_MULTI_END;
         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
         if (PL_expect == XOPERATOR) {
@@ -6857,8 +6884,6 @@ Perl_yylex(pTHX)
             else
                 no_op("String",s);
         }
-       if (!s)
-           missingterm(NULL);
         pl_yylval.ival = OP_CONST;
         TERM(sublex_start());
  
@@ -8385,9 +8410,9 @@ Perl_yylex(pTHX)
  
         case KEY_q:
             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
-           COPLINE_SET_FROM_MULTI_END;
             if (!s)
                 missingterm(NULL);
+           COPLINE_SET_FROM_MULTI_END;
             pl_yylval.ival = OP_CONST;
             TERM(sublex_start());
  
@@ -8397,9 +8422,9 @@ Perl_yylex(pTHX)
         case KEY_qw: {
             OP *words = NULL;
             s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
-           COPLINE_SET_FROM_MULTI_END;
             if (!s)
                 missingterm(NULL);
+           COPLINE_SET_FROM_MULTI_END;
             PL_expect = XOPERATOR;
             if (SvCUR(PL_lex_stuff)) {
                 int warned_comma = !ckWARN(WARN_QW);
@@ -8484,7 +8509,7 @@ Perl_yylex(pTHX)
                     gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
                                  GV_ADD | (UTF ? SVf_UTF8 : 0));
                 else if (*s == '<')
-                   yyerror("<> should be quotes");
+                   yyerror("<> at require-statement should be quotes");
             }
             if (orig_keyword == KEY_require) {
                 orig_keyword = 0;
@@ -10398,7 +10423,6 @@ S_scan_inputsymbol(pTHX_ char *start)
             Copy("ARGV",d,5,char);
  
         /* Check whether readline() is overriden */
-       gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
         if ((gv_readline = gv_override("readline",8)))
             readline_overriden = TRUE;
  
@@ -11087,14 +11111,14 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
                 case '8': case '9':
                     if (shift == 3)
                         yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
-                   /* FALL THROUGH */
+                   /* FALLTHROUGH */
  
                 /* octal digits */
                 case '2': case '3': case '4':
                 case '5': case '6': case '7':
                     if (shift == 1)
                         yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
-                   /* FALL THROUGH */
+                   /* FALLTHROUGH */
  
                 case '0': case '1':
                     b = *s++ & 15;              /* ASCII digit -> value of digit */
@@ -11722,6 +11746,7 @@ S_swallow_bom(pTHX_ U8 *s)
  #endif
              }
         }
+        break;
  
      default:
          if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
@@ -12420,7 +12445,7 @@ S_parse_opt_lexvar(pTHX)
         return NULL;
      s = PL_bufptr;
      d = PL_tokenbuf + 1;
-    PL_tokenbuf[0] = sigil;
+    PL_tokenbuf[0] = (char)sigil;
      parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
      PL_bufptr = s;
      if (d == PL_tokenbuf+1)