[perl #112826] Make Tie::StdHandle::READ respect offset

[perl5.git] / toke.c
diff --git a/toke.c b/toke.c

index 0570a15..df73b88 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -2862,11 +2862,23 @@ S_scan_const(pTHX_ char *start)
  
         /* if we get here, we're not doing a transliteration */
  
-       else if (in_charclass && *s == ']' && ! (s>start+1 && s[-1] == '\\'))
-           in_charclass = FALSE;
+       else if (*s == '[' && PL_lex_inpat && !in_charclass) {
+           char *s1 = s-1;
+           int esc = 0;
+           while (s1 >= start && *s1-- == '\\')
+               esc = !esc;
+           if (!esc)
+               in_charclass = TRUE;
+       }
  
-       else if (PL_lex_inpat && *s == '[')
-           in_charclass = TRUE;
+       else if (*s == ']' && PL_lex_inpat &&  in_charclass) {
+           char *s1 = s-1;
+           int esc = 0;
+           while (s1 >= start && *s1-- == '\\')
+               esc = !esc;
+           if (!esc)
+               in_charclass = FALSE;
+       }
  
         /* skip for regexp comments /(?#comment)/, except for the last
          * char, which will be done separately.
@@ -2979,7 +2991,7 @@ S_scan_const(pTHX_ char *start)
                 /* FALL THROUGH */
             default:
                 {
-                   if ((isALPHA(*s) || isDIGIT(*s)))
+                   if ((isALNUMC(*s)))
                         Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
                                        "Unrecognized escape \\%c passed through",
                                        *s);
@@ -3015,29 +3027,16 @@ S_scan_const(pTHX_ char *start)
  
             /* eg. \x24 indicates the hex constant 0x24 */
             case 'x':
-               ++s;
-               if (*s == '{') {
-                   char* const e = strchr(s, '}');
-                    I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
-                      PERL_SCAN_DISALLOW_PREFIX;
+               {
                     STRLEN len;
+                   const char* error;
  
-                    ++s;
-                   if (!e) {
-                       yyerror("Missing right brace on \\x{}");
+                   bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
+                   s += len;
+                   if (! valid) {
+                       yyerror(error);
                         continue;
                     }
-                    len = e - s;
-                   uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
-                   s = e + 1;
-               }
-               else {
-                   {
-                       STRLEN len = 2;
-                        I32 flags = PERL_SCAN_DISALLOW_PREFIX;
-                       uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
-                       s += len;
-                   }
                 }
  
               NUM_ESCAPE_INSERT:
@@ -5535,7 +5534,7 @@ Perl_yylex(pTHX)
                 }
                 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
                 if (*d == '(') {
-                   d = scan_str(d,TRUE,TRUE);
+                   d = scan_str(d,TRUE,TRUE,FALSE);
                     if (!d) {
                         /* MUST advance bufptr here to avoid bogus
                            "at end of line" context messages from yyerror().
@@ -6431,7 +6430,7 @@ Perl_yylex(pTHX)
         TERM(THING);
  
      case '\'':
-       s = scan_str(s,!!PL_madskills,FALSE);
+       s = scan_str(s,!!PL_madskills,FALSE,FALSE);
         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
         if (PL_expect == XOPERATOR) {
             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
@@ -6446,7 +6445,7 @@ Perl_yylex(pTHX)
         TERM(sublex_start());
  
      case '"':
-       s = scan_str(s,!!PL_madskills,FALSE);
+       s = scan_str(s,!!PL_madskills,FALSE,FALSE);
         DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
         if (PL_expect == XOPERATOR) {
             if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
@@ -6469,7 +6468,7 @@ Perl_yylex(pTHX)
         TERM(sublex_start());
  
      case '`':
-       s = scan_str(s,!!PL_madskills,FALSE);
+       s = scan_str(s,!!PL_madskills,FALSE,FALSE);
         DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
         if (PL_expect == XOPERATOR)
             no_op("Backticks",s);
@@ -7835,7 +7834,7 @@ Perl_yylex(pTHX)
             LOP(OP_PIPE_OP,XTERM);
  
         case KEY_q:
-           s = scan_str(s,!!PL_madskills,FALSE);
+           s = scan_str(s,!!PL_madskills,FALSE,FALSE);
             if (!s)
                 missingterm(NULL);
             pl_yylval.ival = OP_CONST;
@@ -7846,7 +7845,7 @@ Perl_yylex(pTHX)
  
         case KEY_qw: {
             OP *words = NULL;
-           s = scan_str(s,!!PL_madskills,FALSE);
+           s = scan_str(s,!!PL_madskills,FALSE,FALSE);
             if (!s)
                 missingterm(NULL);
             PL_expect = XOPERATOR;
@@ -7896,7 +7895,7 @@ Perl_yylex(pTHX)
         }
  
         case KEY_qq:
-           s = scan_str(s,!!PL_madskills,FALSE);
+           s = scan_str(s,!!PL_madskills,FALSE,FALSE);
             if (!s)
                 missingterm(NULL);
             pl_yylval.ival = OP_STRINGIFY;
@@ -7909,7 +7908,7 @@ Perl_yylex(pTHX)
             TERM(sublex_start());
  
         case KEY_qx:
-           s = scan_str(s,!!PL_madskills,FALSE);
+           s = scan_str(s,!!PL_madskills,FALSE,FALSE);
             if (!s)
                 missingterm(NULL);
             readpipe_override();
@@ -8218,7 +8217,7 @@ Perl_yylex(pTHX)
                     const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
                      STRLEN tmplen;
  
-                   s = scan_str(s,!!PL_madskills,FALSE);
+                   s = scan_str(s,!!PL_madskills,FALSE,FALSE);
                     if (!s)
                         Perl_croak(aTHX_ "Prototype not terminated");
                     /* strip spaces and check for bad characters */
@@ -9049,19 +9048,23 @@ S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charse
      /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
       * the parse starting at 's', based on the subset that are valid in this
       * context input to this routine in 'valid_flags'. Advances s.  Returns
-     * TRUE if the input was a valid flag, so the next char may be as well;
-     * otherwise FALSE. 'charset' should point to a NUL upon first call on the
-     * current regex.  This routine will set it to any charset modifier found.
-     * The caller shouldn't change it.  This way, another charset modifier
-     * encountered in the parse can be detected as an error, as we have decided
-     * to allow only one */
+     * TRUE if the input should be treated as a valid flag, so the next char
+     * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
+     * first call on the current regex.  This routine will set it to any
+     * charset modifier found.  The caller shouldn't change it.  This way,
+     * another charset modifier encountered in the parse can be detected as an
+     * error, as we have decided to allow only one */
  
      const char c = **s;
-
-    if (! strchr(valid_flags, c)) {
-        if (isALNUM(c)) {
-           yyerror(Perl_form(aTHX_ "Unknown regexp modifier \"/%c\"", c));
-            (*s)++;
+    STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
+
+    if ( charlen != 1 || ! strchr(valid_flags, c) ) {
+        if (isALNUM_lazy_if(*s, UTF)) {
+            yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", charlen, *s),
+                       UTF ? SVf_UTF8 : 0);
+            (*s) += charlen;
+            /* Pretend that it worked, so will continue processing before
+             * dieing */
              return TRUE;
          }
          return FALSE;
@@ -9140,7 +9143,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
  {
      dVAR;
      PMOP *pm;
-    char *s = scan_str(start,!!PL_madskills,FALSE);
+    char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
      const char * const valid_flags =
         (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
      char charset = '\0';    /* character set modifier */
@@ -9150,6 +9153,9 @@ S_scan_pat(pTHX_ char *start, I32 type)
  
      PERL_ARGS_ASSERT_SCAN_PAT;
  
+    /* this was only needed for the initial scan_str; set it to false
+     * so that any (?{}) code blocks etc are parsed normally */
+    PL_reg_state.re_reparsing = FALSE;
      if (!s) {
         const char * const delimiter = skipspace(start);
         Perl_croak(aTHX_
@@ -9240,7 +9246,7 @@ S_scan_subst(pTHX_ char *start)
  
      pl_yylval.ival = OP_NULL;
  
-    s = scan_str(start,!!PL_madskills,FALSE);
+    s = scan_str(start,!!PL_madskills,FALSE,FALSE);
  
      if (!s)
         Perl_croak(aTHX_ "Substitution pattern not terminated");
@@ -9258,7 +9264,7 @@ S_scan_subst(pTHX_ char *start)
  #endif
  
      first_start = PL_multi_start;
-    s = scan_str(s,!!PL_madskills,FALSE);
+    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
      if (!s) {
         if (PL_lex_stuff) {
             SvREFCNT_dec(PL_lex_stuff);
@@ -9348,7 +9354,7 @@ S_scan_trans(pTHX_ char *start)
  
      pl_yylval.ival = OP_NULL;
  
-    s = scan_str(start,!!PL_madskills,FALSE);
+    s = scan_str(start,!!PL_madskills,FALSE,FALSE);
      if (!s)
         Perl_croak(aTHX_ "Transliteration pattern not terminated");
  
@@ -9364,7 +9370,7 @@ S_scan_trans(pTHX_ char *start)
      }
  #endif
  
-    s = scan_str(s,!!PL_madskills,FALSE);
+    s = scan_str(s,!!PL_madskills,FALSE,FALSE);
      if (!s) {
         if (PL_lex_stuff) {
             SvREFCNT_dec(PL_lex_stuff);
@@ -9756,7 +9762,7 @@ S_scan_inputsymbol(pTHX_ char *start)
  
      if (d - PL_tokenbuf != len) {
         pl_yylval.ival = OP_GLOB;
-       s = scan_str(start,!!PL_madskills,FALSE);
+       s = scan_str(start,!!PL_madskills,FALSE,FALSE);
         if (!s)
            Perl_croak(aTHX_ "Glob not terminated");
         return s;
@@ -9856,6 +9862,8 @@ intro_sym:
     takes: start position in buffer
           keep_quoted preserve \ on the embedded delimiter(s)
           keep_delims preserve the delimiters around the string
+         re_reparse  compiling a run-time /(?{})/:
+                       collapse // to /,  and skip encoding src
     returns: position to continue reading from buffer
     side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
         updates the read buffer.
@@ -9896,7 +9904,7 @@ intro_sym:
  */
  
  STATIC char *
-S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
+S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
  {
      dVAR;
      SV *sv;                            /* scalar value: string */
@@ -9975,7 +9983,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
      }
  #endif
      for (;;) {
-       if (PL_encoding && !UTF) {
+       if (PL_encoding && !UTF && !re_reparse) {
             bool cont = TRUE;
  
             while (cont) {
@@ -10057,9 +10065,12 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
                     CopLINE_inc(PL_curcop);
                 /* handle quoted delimiters */
                 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
-                   if (!keep_quoted && s[1] == term)
+                   if (!keep_quoted
+                       && (s[1] == term
+                           || (re_reparse && s[1] == '\\'))
+                   )
                         s++;
-               /* any other quotes are simply copied straight through */
+                   /* any other quotes are simply copied straight through */
                     else
                         *to++ = *s++;
                 }
@@ -10160,7 +10171,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
  
      /* at this point, we have successfully read the delimited string */
  
-    if (!PL_encoding || UTF) {
+    if (!PL_encoding || UTF || re_reparse) {
  #ifdef PERL_MAD
         if (PL_madskills) {
             char * const tstart = SvPVX(PL_linestr) + stuffstart;
@@ -10192,7 +10203,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
         }
      }
  #endif
-    if (has_utf8 || PL_encoding)
+    if (has_utf8 || (PL_encoding && !re_reparse))
         SvUTF8_on(sv);
  
      PL_multi_end = CopLINE(PL_curcop);