Rewrite csh_glob in C; fix two quoting bugs

author Father Chrysostomos <sprout@cpan.org>

Tue, 25 Oct 2011 22:40:40 +0000 (15:40 -0700)

committer Father Chrysostomos <sprout@cpan.org>

Thu, 27 Oct 2011 01:22:18 +0000 (18:22 -0700)
author Father Chrysostomos <sprout@cpan.org>
Tue, 25 Oct 2011 22:40:40 +0000 (15:40 -0700)
committer Father Chrysostomos <sprout@cpan.org>
Thu, 27 Oct 2011 01:22:18 +0000 (18:22 -0700)
diff --git a/ext/File-Glob/Glob.pm b/ext/File-Glob/Glob.pm

index f9f0edd..a665816 100644 (file)
--- a/ext/File-Glob/Glob.pm
+++ b/ext/File-Glob/Glob.pm
@@ -70,67 +70,6 @@ sub glob {
      goto &bsd_glob;
  }
  
      goto &bsd_glob;
  }
  
-## borrowed heavily from gsar's File::DosGlob
-my %iter;
-my %entries;
-
-sub csh_glob {
-    my $pat = shift;
-    my $cxix = shift;
-
-    # assume global context if not provided one
-    $cxix = '_G_' unless defined $cxix;
-    $iter{$cxix} = 0 unless exists $iter{$cxix};
-
-    # if we're just beginning, do it all first
-    if ($iter{$cxix} == 0) {
-       my @pat;
-
-       # glob without args defaults to $_
-       $pat = $_ unless defined $pat;
-
-       # extract patterns
-       $pat =~ s/^\s+//;       # Protect against empty elements in
-                               # things like < *.c>, which alone
-                               # shouldn't trigger ParseWords.  Patterns
-                               # with a trailing space must be passed
-                               # to ParseWords, in case it is escaped,
-                               # as in glob('\ ').
-       if ($pat =~ /[\s"']/) {
-           # XXX this is needed for compatibility with the csh
-           # implementation in Perl.  Need to support a flag
-           # to disable this behavior.
-           require Text::ParseWords;
-           for (@pat = Text::ParseWords::parse_line('\s+',1,$pat)) {
-               s/^['"]// and chop;
-           }
-       }
-       if (@pat) {
-           $entries{$cxix} = [ map { doglob($_, $DEFAULT_FLAGS) } @pat ];
-       }
-       else {
-           $entries{$cxix} = [ doglob($pat, $DEFAULT_FLAGS) ];
-       }
-    }
-
-    # chuck it all out, quick or slow
-    if (wantarray) {
-        delete $iter{$cxix};
-        return @{delete $entries{$cxix}};
-    }
-    else {
-        if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
-            return shift @{$entries{$cxix}};
-        }
-        else {
-            # return undef for EOL
-            delete $iter{$cxix};
-            delete $entries{$cxix};
-            return undef;
-        }
-    }
-}
-
  1;
  __END__
  
  1;
  __END__
  
diff --git a/ext/File-Glob/Glob.xs b/ext/File-Glob/Glob.xs

index 91b9f4d..2a9fbb0 100644 (file)
--- a/ext/File-Glob/Glob.xs
+++ b/ext/File-Glob/Glob.xs
@@ -10,6 +10,8 @@
  
  typedef struct {
      int                x_GLOB_ERROR;
  
  typedef struct {
      int                x_GLOB_ERROR;
+    HV *       x_GLOB_ITER;
+    HV *       x_GLOB_ENTRIES;
  } my_cxt_t;
  
  START_MY_CXT
  } my_cxt_t;
  
  START_MY_CXT
@@ -28,6 +30,236 @@ errfunc(const char *foo, int bar) {
  }
  #endif
  
  }
  #endif
  
+static void
+doglob(pTHX_ const char *pattern, int flags)
+{
+    dSP;
+    glob_t pglob;
+    int i;
+    int retval;
+    SV *tmp;
+    {
+       dMY_CXT;
+
+       /* call glob */
+       memset(&pglob, 0, sizeof(glob_t));
+       retval = bsd_glob(pattern, flags, errfunc, &pglob);
+       GLOB_ERROR = retval;
+
+       /* return any matches found */
+       EXTEND(sp, pglob.gl_pathc);
+       for (i = 0; i < pglob.gl_pathc; i++) {
+           /* printf("# bsd_glob: %s\n", pglob.gl_pathv[i]); */
+           tmp = newSVpvn_flags(pglob.gl_pathv[i], strlen(pglob.gl_pathv[i]),
+                                SVs_TEMP);
+           TAINT;
+           SvTAINT(tmp);
+           PUSHs(tmp);
+       }
+       PUTBACK;
+
+       bsd_globfree(&pglob);
+    }
+}
+
+/* borrowed heavily from gsar's File::DosGlob, but translated into C */
+static void
+csh_glob(pTHX)
+{
+    dSP;
+    dMY_CXT;
+
+    SV *cxixsv = POPs;
+    const char *cxixpv;
+    STRLEN cxixlen;
+    STRLEN len;
+    const char *s = NULL;
+    SV *itersv;
+    SV *entriesv;
+    AV *entries = NULL;
+    U32 gimme = GIMME_V;
+    SV *patsv = POPs;
+
+    /* assume global context if not provided one */
+    SvGETMAGIC(cxixsv);
+    if (SvOK(cxixsv)) cxixpv = SvPV_nomg(cxixsv, cxixlen);
+    else cxixpv = "_G_", cxixlen = 3;
+
+    if (!MY_CXT.x_GLOB_ITER) MY_CXT.x_GLOB_ITER = newHV();
+    itersv = *(hv_fetch(MY_CXT.x_GLOB_ITER, cxixpv, cxixlen, 1));
+    if (!SvOK(itersv)) sv_setiv(itersv,0);
+
+    if (!MY_CXT.x_GLOB_ENTRIES) MY_CXT.x_GLOB_ENTRIES = newHV();
+    entriesv = *(hv_fetch(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, 1));
+
+    /* if we're just beginning, do it all first */
+    if (!SvIV(itersv)) {
+       const char *pat;
+       AV *patav = NULL;
+       const char *patend;
+       const char *piece = NULL;
+       SV *word = NULL;
+       int const flags =
+           (int)SvIV(get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD));
+       bool is_utf8;
+
+       /* glob without args defaults to $_ */
+       SvGETMAGIC(patsv);
+       if (
+           !SvOK(patsv)
+        && (patsv = DEFSV, SvGETMAGIC(patsv), !SvOK(patsv))
+       )
+            pat = "", len = 0, is_utf8 = 0;
+       else pat = SvPV_nomg(patsv,len), is_utf8 = !!SvUTF8(patsv);
+       patend = pat + len;
+
+       /* extract patterns */
+       /* XXX this is needed for compatibility with the csh
+        * implementation in Perl.  Need to support a flag
+        * to disable this behavior.
+        */
+       s = pat-1;
+       while (++s < patend) {
+           switch (*s) {
+           case '\'':
+           case '"' :
+             {
+               bool found = FALSE;
+               if (!word) {
+                   word = newSVpvs("");
+                   if (is_utf8) SvUTF8_on(word);
+               }
+               if (piece) sv_catpvn(word, piece, s-piece);
+               piece = s+1;
+               while (++s <= patend)
+                   if (*s == '\\') s++;
+                   else if (*s == *(piece-1)) {
+                       sv_catpvn(word, piece, s-piece);
+                       piece = NULL;
+                       found = TRUE;
+                       break;
+                   }
+               if (!found) { /* unmatched quote */
+                   /* Give up on tokenisation and treat the whole string
+                      as a single token, but with whitespace stripped. */
+                   piece = pat;
+                   while (isSPACE(*pat)) pat++;
+                   while (isSPACE(*(patend-1))) patend--;
+                   /* bsd_glob expects a trailing null, but we cannot mod-
+                      ify the original */
+                   if (patend < SvEND(patsv)) {
+                       if (word) sv_setpvn(word, pat, patend-pat);
+                       else
+                           word = newSVpvn_flags(
+                               pat, patend-pat, SVf_UTF8*is_utf8
+                           );
+                       piece = NULL;
+                   }
+                   else {
+                       if (word) SvREFCNT_dec(word), word=NULL;
+                       piece = pat;
+                       s = patend;
+                   }
+                   goto end_of_parsing;
+               }
+               break;
+             }
+           case '\\': if (!piece) piece = s; s++; break;
+           default:
+               if (isSPACE(*s)) {
+                   if (piece) {
+                       if (!word) {
+                           word = newSVpvn(piece,s-piece);
+                           if (is_utf8) SvUTF8_on(word);
+                       }
+                       else sv_catpvn(word, piece, s-piece);
+                   }
+                   if (!word) break;
+                   if (!patav) patav = (AV *)sv_2mortal((SV *)newAV());
+                   av_push(patav, word);
+                   word = NULL;
+                   piece = NULL;
+               }
+               else if (!piece) piece = s;
+               break;
+           }
+       }
+      end_of_parsing:
+
+       assert(!SvROK(entriesv));
+       entries = (AV *)newSVrv(entriesv,NULL);
+       sv_upgrade((SV *)entries, SVt_PVAV);
+       
+       if (patav) {
+           I32 items = AvFILLp(patav) + 1;
+           SV **svp = AvARRAY(patav);
+           while (items--) {
+               PUSHMARK(SP);
+               PUTBACK;
+               doglob(aTHX_ SvPVXx(*svp++), flags);
+               SPAGAIN;
+               {
+                   dMARK;
+                   dORIGMARK;
+                   while (++MARK <= SP)
+                       av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
+                   SP = ORIGMARK;
+               }
+           }
+       }
+       /* piece is set at this point if there is no trailing whitespace.
+          It is the beginning of the last token or quote-delimited
+          piece thereof.  word is set at this point if the last token has
+          multiple quoted pieces. */
+       if (piece || word) {
+           if (word) {
+               if (piece) sv_catpvn(word, piece, s-piece);
+               piece = SvPVX(word);
+           }
+           PUSHMARK(SP);
+           PUTBACK;
+           doglob(aTHX_ piece, flags);
+           if (word) SvREFCNT_dec(word);
+           SPAGAIN;
+           {
+               dMARK;
+               dORIGMARK;
+               while (++MARK <= SP)
+                   av_push(entries, SvREFCNT_inc_simple_NN(*MARK));
+               /* short-circuit here for a fairly common case */
+               if (!patav && gimme == G_ARRAY) goto return_list;
+
+               SP = ORIGMARK;
+           }
+       }
+    }
+
+    /* chuck it all out, quick or slow */
+    assert(SvROK(entriesv));
+    if (!entries) entries = (AV *)SvRV(entriesv);
+    if (gimme == G_ARRAY) {
+       Copy(AvARRAY(entries), SP+1, AvFILLp(entries)+1, SV *);
+       SP += AvFILLp(entries)+1;
+      return_list:
+       hv_delete(MY_CXT.x_GLOB_ITER, cxixpv, cxixlen, G_DISCARD);
+       /* No G_DISCARD here!  It will free the stack items. */
+       hv_delete(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, 0);
+    }
+    else {
+       if (AvFILLp(entries) + 1) {
+           sv_setiv(itersv, AvFILLp(entries) + 1);
+           mPUSHs(av_shift(entries));
+       }
+       else {
+           /* return undef for EOL */
+           hv_delete(MY_CXT.x_GLOB_ITER, cxixpv, cxixlen, G_DISCARD);
+           hv_delete(MY_CXT.x_GLOB_ENTRIES, cxixpv, cxixlen, G_DISCARD);
+           PUSHs(&PL_sv_undef);
+       }
+    }
+    PUTBACK;
+}
+
  MODULE = File::Glob            PACKAGE = File::Glob
  
  int
  MODULE = File::Glob            PACKAGE = File::Glob
  
  int
@@ -62,25 +294,28 @@ PPCODE:
         } else if (ix) {
             flags = (int) SvIV(get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD));
         }
         } else if (ix) {
             flags = (int) SvIV(get_sv("File::Glob::DEFAULT_FLAGS", GV_ADD));
         }
+       
+       PUTBACK;
+       doglob(aTHX_ pattern, flags);
+       SPAGAIN;
+    }
  
  
-       /* call glob */
-       memset(&pglob, 0, sizeof(glob_t));
-       retval = bsd_glob(pattern, flags, errfunc, &pglob);
-       GLOB_ERROR = retval;
-
-       /* return any matches found */
-       EXTEND(sp, pglob.gl_pathc);
-       for (i = 0; i < pglob.gl_pathc; i++) {
-           /* printf("# bsd_glob: %s\n", pglob.gl_pathv[i]); */
-           tmp = newSVpvn_flags(pglob.gl_pathv[i], strlen(pglob.gl_pathv[i]),
-                                SVs_TEMP);
-           TAINT;
-           SvTAINT(tmp);
-           PUSHs(tmp);
-       }
-
-       bsd_globfree(&pglob);
+PROTOTYPES: DISABLE
+void
+csh_glob(...)
+PPCODE:
+    /* For backward-compatibility with the original Perl function, we sim-
+     * ply take the first two arguments, regardless of how many there are.
+     */
+    if (items >= 2) SP += 2;
+    else {
+       SP += items;
+       XPUSHs(&PL_sv_undef);
+       if (!items) XPUSHs(&PL_sv_undef);
      }
      }
+    PUTBACK;
+    csh_glob(aTHX);
+    SPAGAIN;
  
  BOOT:
  {
  
  BOOT:
  {
@@ -91,6 +326,10 @@ BOOT:
  BOOT:
  {
      MY_CXT_INIT;
  BOOT:
  {
      MY_CXT_INIT;
+    {
+       dMY_CXT;
+       MY_CXT.x_GLOB_ITER = MY_CXT.x_GLOB_ENTRIES = NULL;
+    }  
  }
  
  INCLUDE: const-xs.inc
  }
  
  INCLUDE: const-xs.inc
diff --git a/ext/File-Glob/t/basic.t b/ext/File-Glob/t/basic.t

index f7a2f1f..309e2bb 100644 (file)
--- a/ext/File-Glob/t/basic.t
+++ b/ext/File-Glob/t/basic.t
@@ -10,7 +10,7 @@ BEGIN {
      }
  }
  use strict;
      }
  }
  use strict;
-use Test::More tests => 22;
+use Test::More tests => 25;
  BEGIN {use_ok('File::Glob', ':glob')};
  use Cwd ();
  
  BEGIN {use_ok('File::Glob', ':glob')};
  use Cwd ();
  
@@ -242,6 +242,9 @@ use subs 'glob';
  BEGIN { *glob = \&File::Glob::csh_glob }
  
  is +(glob "a'b'")[0], (<a'b' c>)[0], "a'b' with and without spaces";
  BEGIN { *glob = \&File::Glob::csh_glob }
  
  is +(glob "a'b'")[0], (<a'b' c>)[0], "a'b' with and without spaces";
-is +(<a"b">)[0], (<a"b" c>)[0], 'a"b" with and without spaces';
+is <a"b">, 'ab', 'a"b" without spaces';
+is_deeply [<a"b" c>], [qw<ab c>], 'a"b" without spaces';
  is_deeply [<\\* .\\*>], [<\\*>,<.\\*>], 'backslashes with(out) spaces';
  like <\\ >, qr/^\\? \z/, 'final escaped space';
  is_deeply [<\\* .\\*>], [<\\*>,<.\\*>], 'backslashes with(out) spaces';
  like <\\ >, qr/^\\? \z/, 'final escaped space';
+is <a"b>, 'a"b', 'unmatched quote';
+is < a"b >, 'a"b', 'unmatched quote with surrounding spaces';
diff --git a/t/op/glob.t b/t/op/glob.t

index d5ddd9f..f26d7b3 100644 (file)
--- a/t/op/glob.t
+++ b/t/op/glob.t
@@ -48,14 +48,14 @@ my $i = 0;
  for (1..2) {
      eval "<.>";
      ok(!length($@),"eval'ed a glob $_");
  for (1..2) {
      eval "<.>";
      ok(!length($@),"eval'ed a glob $_");
-    undef %File::Glob::;
+    local %File::Glob::;
      ++$i;
  }
  cmp_ok($i,'==',2,'remove File::Glob stash');
  
  # a more sinister version of the same test (crashes from 5.8 to 5.13.1)
  {
      ++$i;
  }
  cmp_ok($i,'==',2,'remove File::Glob stash');
  
  # a more sinister version of the same test (crashes from 5.8 to 5.13.1)
  {
-    undef %File::Glob::;
+    local %File::Glob::;
      local %CORE::GLOBAL::;
      eval "<.>";
      ok(!length($@),"remove File::Glob stash *and* CORE::GLOBAL::glob");
      local %CORE::GLOBAL::;
      eval "<.>";
      ok(!length($@),"remove File::Glob stash *and* CORE::GLOBAL::glob");
diff --git a/t/run/fresh_perl.t b/t/run/fresh_perl.t

index a3874d9..0aaf28a 100644 (file)
--- a/t/run/fresh_perl.t
+++ b/t/run/fresh_perl.t
@@ -761,49 +761,23 @@ EXPECT
  foo at - line 1.
  ######## glob() bug Mon, 01 Sep 2003 02:25:41 -0700 <200309010925.h819Pf0X011457@smtp3.ActiveState.com>
  -lw
  foo at - line 1.
  ######## glob() bug Mon, 01 Sep 2003 02:25:41 -0700 <200309010925.h819Pf0X011457@smtp3.ActiveState.com>
  -lw
-BEGIN {
-  if ($^O eq 'os390') {
-    require File::Glob;
-    import File::Glob ':glob';
-  }
-}
-BEGIN {
-  eval 'require Fcntl';
-  if ($@) { print qq[./"TEST"\n./"TEST"\n]; exit 0 } # running minitest?
-}
-if ($^O eq 'VMS') { # VMS is not *that* kind of a glob.
-print qq[./"TEST"\n./"TEST"\n];
-} else {
-print glob(q(./"TEST"));
-use File::Glob;
-print glob(q(./"TEST"));
-}
-EXPECT
-./"TEST"
-./"TEST"
-######## glob() bug Mon, 01 Sep 2003 02:25:41 -0700 <200309010925.h819Pf0X011457@smtp3.ActiveState.com>
--lw
-BEGIN {
-  if ($^O eq 'os390') {
-    require File::Glob;
-    import File::Glob ':glob';
-  }
-}
-BEGIN {
-  eval 'require Fcntl';
-  if ($@) { print qq[./"TEST"\n./"TEST"\n]; exit 0 } # running minitest?
-}
-if ($^O eq 'VMS') { # VMS is not *that* kind of a glob.
-print qq[./"TEST"\n./"TEST"\n];
-} else {
-use File::Glob;
-print glob(q(./"TEST"));
-use File::Glob;
-print glob(q(./"TEST"));
-}
-EXPECT
-./"TEST"
-./"TEST"
+# Make sure the presence of the CORE::GLOBAL::glob typeglob does not affect
+# whether File::Glob::csh_glob is called.
+++$INC{"File/Glob.pm"}; # prevent it from loading
+my $called1 =
+my $called2 = 0;
+*File::Glob::csh_glob = sub { ++$called1 };
+my $output1 = eval q{ glob(q(./"TEST")) };
+undef *CORE::GLOBAL::glob; # but leave the typeglob itself there
+++$CORE::GLOBAL::glob if 0; # "used only once"
+undef *File::Glob::csh_glob; # avoid redefinition warnings
+*File::Glob::csh_glob = sub { ++$called2 };
+my $output2 = eval q{ glob(q(./"TEST")) };
+print "ok1" if $called1 eq $called2;
+print "ok2" if $output1 eq $output2;
+EXPECT
+ok1
+ok2
  ######## "#75146: 27e904532594b7fb (fix for #23810) introduces a #regression"
  use strict;
  
  ######## "#75146: 27e904532594b7fb (fix for #23810) introduces a #regression"
  use strict;
author	Father Chrysostomos <sprout@cpan.org>
	Tue, 25 Oct 2011 22:40:40 +0000 (15:40 -0700)
committer	Father Chrysostomos <sprout@cpan.org>
	Thu, 27 Oct 2011 01:22:18 +0000 (18:22 -0700)
ext/File-Glob/Glob.pm		patch \| blob \| blame \| history
ext/File-Glob/Glob.xs		patch \| blob \| blame \| history
ext/File-Glob/t/basic.t		patch \| blob \| blame \| history
t/op/glob.t		patch \| blob \| blame \| history
t/run/fresh_perl.t		patch \| blob \| blame \| history