This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
DosGlob: Don’t parse an pattern that will not be used
[perl5.git] / lib / File / DosGlob.pm
CommitLineData
08aa1457
PP
1#!perl -w
2
37248846
MM
3# use strict fails
4#Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191.
5
08aa1457
PP
6#
7# Documentation at the __END__
8#
9
10package File::DosGlob;
11
598c6284 12our $VERSION = '1.06';
b75c8c73 13use strict;
b395063c 14use warnings;
b75c8c73 15
08aa1457
PP
16sub doglob {
17 my $cond = shift;
18 my @retval = ();
2f3c8ce9 19 my $fix_drive_relative_paths;
08aa1457
PP
20 #print "doglob: ", join('|', @_), "\n";
21 OUTER:
b75c8c73 22 for my $pat (@_) {
08aa1457
PP
23 my @matched = ();
24 my @globdirs = ();
25 my $head = '.';
26 my $sepchr = '/';
b75c8c73
MS
27 my $tail;
28 next OUTER unless defined $pat and $pat ne '';
08aa1457 29 # if arg is within quotes strip em and do no globbing
b75c8c73
MS
30 if ($pat =~ /^"(.*)"\z/s) {
31 $pat = $1;
32 if ($cond eq 'd') { push(@retval, $pat) if -d $pat }
33 else { push(@retval, $pat) if -e $pat }
08aa1457
PP
34 next OUTER;
35 }
4dd406c2
GS
36 # wildcards with a drive prefix such as h:*.pm must be changed
37 # to h:./*.pm to expand correctly
b75c8c73 38 if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) {
a371bcf3 39 substr($pat,0,2) = $1 . "./";
2f3c8ce9 40 $fix_drive_relative_paths = 1;
4dd406c2 41 }
b75c8c73 42 if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) {
08aa1457
PP
43 ($head, $sepchr, $tail) = ($1,$2,$3);
44 #print "div: |$head|$sepchr|$tail|\n";
b75c8c73 45 push (@retval, $pat), next OUTER if $tail eq '';
08aa1457
PP
46 if ($head =~ /[*?]/) {
47 @globdirs = doglob('d', $head);
48 push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)),
49 next OUTER if @globdirs;
50 }
4dd406c2 51 $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s;
b75c8c73 52 $pat = $tail;
08aa1457
PP
53 }
54 #
55 # If file component has no wildcards, we can avoid opendir
b75c8c73 56 unless ($pat =~ /[*?]/) {
08aa1457
PP
57 $head = '' if $head eq '.';
58 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
b75c8c73 59 $head .= $pat;
08aa1457
PP
60 if ($cond eq 'd') { push(@retval,$head) if -d $head }
61 else { push(@retval,$head) if -e $head }
62 next OUTER;
63 }
64 opendir(D, $head) or next OUTER;
65 my @leaves = readdir D;
66 closedir D;
67 $head = '' if $head eq '.';
68 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
69
70 # escape regex metachars but not glob chars
2f3c8ce9 71 $pat =~ s:([].+^\-\${}()[|]):\\$1:g;
08aa1457 72 # and convert DOS-style wildcards to regex
b75c8c73
MS
73 $pat =~ s/\*/.*/g;
74 $pat =~ s/\?/.?/g;
08aa1457 75
b75c8c73
MS
76 #print "regex: '$pat', head: '$head'\n";
77 my $matchsub = sub { $_[0] =~ m|^$pat\z|is };
08aa1457
PP
78 INNER:
79 for my $e (@leaves) {
80 next INNER if $e eq '.' or $e eq '..';
81 next INNER if $cond eq 'd' and ! -d "$head$e";
82 push(@matched, "$head$e"), next INNER if &$matchsub($e);
83 #
84 # [DOS compatibility special case]
85 # Failed, add a trailing dot and try again, but only
86 # if name does not have a dot in it *and* pattern
87 # has a dot *and* name is shorter than 9 chars.
88 #
89 if (index($e,'.') == -1 and length($e) < 9
b75c8c73 90 and index($pat,'\\.') != -1) {
08aa1457
PP
91 push(@matched, "$head$e"), next INNER if &$matchsub("$e.");
92 }
93 }
94 push @retval, @matched if @matched;
95 }
2f3c8ce9
AD
96 if ($fix_drive_relative_paths) {
97 s|^([A-Za-z]:)\./|$1| for @retval;
98 }
08aa1457
PP
99 return @retval;
100}
101
102#
fb73857a
PP
103# this can be used to override CORE::glob in a specific
104# package by saying C<use File::DosGlob 'glob';> in that
105# namespace.
08aa1457 106#
fb73857a
PP
107
108# context (keyed by second cxix arg provided by core)
fb73857a
PP
109my %entries;
110
111sub glob {
b75c8c73 112 my($pat,$cxix) = @_;
163d180b 113 my @pat;
fb73857a
PP
114
115 # glob without args defaults to $_
116 $pat = $_ unless defined $pat;
117
f9417615
FC
118 # assume global context if not provided one
119 $cxix = '_G_' unless defined $cxix;
120
121 # if we're just beginning, do it all first
122 if (!$entries{$cxix}) {
123 # extract patterns
124 if ($pat =~ /\s/) {
163d180b
GS
125 require Text::ParseWords;
126 @pat = Text::ParseWords::parse_line('\s+',0,$pat);
f9417615
FC
127 }
128 else {
163d180b 129 push @pat, $pat;
f9417615 130 }
163d180b 131
f9417615
FC
132 # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3.
133 # abc3 will be the original {3} (and drop the {}).
134 # abc1 abc2 will be put in @appendpat.
135 # This was just the esiest way, not nearly the best.
136 REHASH: {
37248846
MM
137 my @appendpat = ();
138 for (@pat) {
139 # There must be a "," I.E. abc{efg} is not what we want.
140 while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) {
141 my ($start, $match, $end) = ($1, $2, $3);
142 #print "Got: \n\t$start\n\t$match\n\t$end\n";
143 my $tmp = "$start$match$end";
144 while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) {
145 #print "Striped: $tmp\n";
81de5c05 146 # these expansions will be performed by the original,
37248846
MM
147 # when we call REHASH.
148 }
149 push @appendpat, ("$tmp");
150 s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/;
151 if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) {
152 $match = $1;
153 #print "GOT: \n\t$start\n\t$match\n\t$end\n\n";
154 $_ = "$start$match$end";
155 }
156 }
157 #print "Sould have "GOT" vs "Got"!\n";
158 #FIXME: There should be checking for this.
159 # How or what should be done about failure is beond me.
160 }
161 if ( $#appendpat != -1
162 ) {
163 #print "LOOP\n";
164 #FIXME: Max loop, no way! :")
165 for ( @appendpat ) {
166 push @pat, $_;
167 }
168 goto REHASH;
169 }
f9417615
FC
170 }
171 for ( @pat ) {
37248846
MM
172 s/\\{/{/g;
173 s/\\}/}/g;
174 s/\\,/,/g;
f9417615
FC
175 }
176 #print join ("\n", @pat). "\n";
37248846 177
f9417615
FC
178 $entries{$cxix} = [doglob(1,@pat)];
179 }
fb73857a
PP
180
181 # chuck it all out, quick or slow
182 if (wantarray) {
fb73857a
PP
183 return @{delete $entries{$cxix}};
184 }
185 else {
3a29130a 186 if (scalar @{$entries{$cxix}}) {
fb73857a
PP
187 return shift @{$entries{$cxix}};
188 }
189 else {
190 # return undef for EOL
fb73857a
PP
191 delete $entries{$cxix};
192 return undef;
193 }
194 }
195}
08aa1457 196
b75c8c73
MS
197{
198 no strict 'refs';
199
200 sub import {
08aa1457 201 my $pkg = shift;
95d94a4f 202 return unless @_;
08aa1457 203 my $sym = shift;
4dd406c2 204 my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0));
95d94a4f 205 *{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob';
b75c8c73 206 }
08aa1457 207}
08aa1457
PP
2081;
209
210__END__
211
212=head1 NAME
213
214File::DosGlob - DOS like globbing and then some
215
08aa1457
PP
216=head1 SYNOPSIS
217
218 require 5.004;
3cb6de81 219
fb73857a
PP
220 # override CORE::glob in current package
221 use File::DosGlob 'glob';
3cb6de81 222
95d94a4f
GS
223 # override CORE::glob in ALL packages (use with extreme caution!)
224 use File::DosGlob 'GLOBAL_glob';
225
08aa1457
PP
226 @perlfiles = glob "..\\pe?l/*.p?";
227 print <..\\pe?l/*.p?>;
3cb6de81 228
fb73857a 229 # from the command line (overrides only in main::)
08aa1457 230 > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>"
08aa1457
PP
231
232=head1 DESCRIPTION
233
234A module that implements DOS-like globbing with a few enhancements.
dfb634a9 235It is largely compatible with perlglob.exe (the M$ setargv.obj
08aa1457
PP
236version) in all but one respect--it understands wildcards in
237directory components.
238
239For example, C<<..\\l*b\\file/*glob.p?>> will work as expected (in
240that it will find something like '..\lib\File/DosGlob.pm' alright).
241Note that all path components are case-insensitive, and that
242backslashes and forward slashes are both accepted, and preserved.
243You may have to double the backslashes if you are putting them in
244literally, due to double-quotish parsing of the pattern by perl.
245
163d180b
GS
246Spaces in the argument delimit distinct patterns, so
247C<glob('*.exe *.dll')> globs all filenames that end in C<.exe>
248or C<.dll>. If you want to put in literal spaces in the glob
249pattern, you can escape them with either double quotes, or backslashes.
250e.g. C<glob('c:/"Program Files"/*/*.dll')>, or
251C<glob('c:/Program\ Files/*/*.dll')>. The argument is tokenized using
252C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details
253of the quoting rules used.
254
08aa1457
PP
255Extending it to csh patterns is left as an exercise to the reader.
256
257=head1 EXPORTS (by request only)
258
259glob()
260
261=head1 BUGS
262
263Should probably be built into the core, and needs to stop
264pandering to DOS habits. Needs a dose of optimizium too.
265
266=head1 AUTHOR
267
6e238990 268Gurusamy Sarathy <gsar@activestate.com>
08aa1457
PP
269
270=head1 HISTORY
271
272=over 4
273
274=item *
275
95d94a4f
GS
276Support for globally overriding glob() (GSAR 3-JUN-98)
277
278=item *
279
fb73857a
PP
280Scalar context, independent iterator context fixes (GSAR 15-SEP-97)
281
282=item *
283
08aa1457
PP
284A few dir-vs-file optimizations result in glob importation being
28510 times faster than using perlglob.exe, and using perlglob.bat is
286only twice as slow as perlglob.exe (GSAR 28-MAY-97)
287
288=item *
289
63d661c5
FC
290Several cleanups prompted by lack of compatible perlglob.exe
291under Borland (GSAR 27-MAY-97)
292
293=item *
294
08aa1457
PP
295Initial version (GSAR 20-FEB-97)
296
297=back
298
299=head1 SEE ALSO
300
301perl
302
dfb634a9
GS
303perlglob.bat
304
163d180b
GS
305Text::ParseWords
306
08aa1457
PP
307=cut
308