This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #71712] fixes for File::DosGlob
[perl5.git] / lib / File / DosGlob.pm
CommitLineData
08aa1457
PP
1#!perl -w
2
37248846
MM
3# use strict fails
4#Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191.
5
08aa1457
PP
6#
7# Documentation at the __END__
8#
9
10package File::DosGlob;
11
2f3c8ce9 12our $VERSION = '1.03';
b75c8c73 13use strict;
b395063c 14use warnings;
b75c8c73 15
08aa1457
PP
16sub doglob {
17 my $cond = shift;
18 my @retval = ();
2f3c8ce9 19 my $fix_drive_relative_paths;
08aa1457
PP
20 #print "doglob: ", join('|', @_), "\n";
21 OUTER:
b75c8c73 22 for my $pat (@_) {
08aa1457
PP
23 my @matched = ();
24 my @globdirs = ();
25 my $head = '.';
26 my $sepchr = '/';
b75c8c73
MS
27 my $tail;
28 next OUTER unless defined $pat and $pat ne '';
08aa1457 29 # if arg is within quotes strip em and do no globbing
b75c8c73
MS
30 if ($pat =~ /^"(.*)"\z/s) {
31 $pat = $1;
32 if ($cond eq 'd') { push(@retval, $pat) if -d $pat }
33 else { push(@retval, $pat) if -e $pat }
08aa1457
PP
34 next OUTER;
35 }
4dd406c2
GS
36 # wildcards with a drive prefix such as h:*.pm must be changed
37 # to h:./*.pm to expand correctly
b75c8c73 38 if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) {
a371bcf3 39 substr($pat,0,2) = $1 . "./";
2f3c8ce9 40 $fix_drive_relative_paths = 1;
4dd406c2 41 }
b75c8c73 42 if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) {
08aa1457
PP
43 ($head, $sepchr, $tail) = ($1,$2,$3);
44 #print "div: |$head|$sepchr|$tail|\n";
b75c8c73 45 push (@retval, $pat), next OUTER if $tail eq '';
08aa1457
PP
46 if ($head =~ /[*?]/) {
47 @globdirs = doglob('d', $head);
48 push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)),
49 next OUTER if @globdirs;
50 }
4dd406c2 51 $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s;
b75c8c73 52 $pat = $tail;
08aa1457
PP
53 }
54 #
55 # If file component has no wildcards, we can avoid opendir
b75c8c73 56 unless ($pat =~ /[*?]/) {
08aa1457
PP
57 $head = '' if $head eq '.';
58 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
b75c8c73 59 $head .= $pat;
08aa1457
PP
60 if ($cond eq 'd') { push(@retval,$head) if -d $head }
61 else { push(@retval,$head) if -e $head }
62 next OUTER;
63 }
64 opendir(D, $head) or next OUTER;
65 my @leaves = readdir D;
66 closedir D;
67 $head = '' if $head eq '.';
68 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
69
70 # escape regex metachars but not glob chars
2f3c8ce9 71 $pat =~ s:([].+^\-\${}()[|]):\\$1:g;
08aa1457 72 # and convert DOS-style wildcards to regex
b75c8c73
MS
73 $pat =~ s/\*/.*/g;
74 $pat =~ s/\?/.?/g;
08aa1457 75
b75c8c73
MS
76 #print "regex: '$pat', head: '$head'\n";
77 my $matchsub = sub { $_[0] =~ m|^$pat\z|is };
08aa1457
PP
78 INNER:
79 for my $e (@leaves) {
80 next INNER if $e eq '.' or $e eq '..';
81 next INNER if $cond eq 'd' and ! -d "$head$e";
82 push(@matched, "$head$e"), next INNER if &$matchsub($e);
83 #
84 # [DOS compatibility special case]
85 # Failed, add a trailing dot and try again, but only
86 # if name does not have a dot in it *and* pattern
87 # has a dot *and* name is shorter than 9 chars.
88 #
89 if (index($e,'.') == -1 and length($e) < 9
b75c8c73 90 and index($pat,'\\.') != -1) {
08aa1457
PP
91 push(@matched, "$head$e"), next INNER if &$matchsub("$e.");
92 }
93 }
94 push @retval, @matched if @matched;
95 }
2f3c8ce9
AD
96 if ($fix_drive_relative_paths) {
97 s|^([A-Za-z]:)\./|$1| for @retval;
98 }
08aa1457
PP
99 return @retval;
100}
101
be708cc0
JH
102
103#
104# Do DOS-like globbing on Mac OS
105#
106sub doglob_Mac {
107 my $cond = shift;
108 my @retval = ();
109
110 #print "doglob_Mac: ", join('|', @_), "\n";
111 OUTER:
112 for my $arg (@_) {
113 local $_ = $arg;
114 my @matched = ();
115 my @globdirs = ();
116 my $head = ':';
117 my $not_esc_head = $head;
118 my $sepchr = ':';
119 next OUTER unless defined $_ and $_ ne '';
120 # if arg is within quotes strip em and do no globbing
121 if (/^"(.*)"\z/s) {
122 $_ = $1;
123 # $_ may contain escaped metachars '\*', '\?' and '\'
124 my $not_esc_arg = $_;
125 $not_esc_arg =~ s/\\([*?\\])/$1/g;
126 if ($cond eq 'd') { push(@retval, $not_esc_arg) if -d $not_esc_arg }
127 else { push(@retval, $not_esc_arg) if -e $not_esc_arg }
128 next OUTER;
129 }
130
131 if (m|^(.*?)(:+)([^:]*)\z|s) { # note: $1 is not greedy
132 my $tail;
133 ($head, $sepchr, $tail) = ($1,$2,$3);
134 #print "div: |$head|$sepchr|$tail|\n";
135 push (@retval, $_), next OUTER if $tail eq '';
136 #
137 # $head may contain escaped metachars '\*' and '\?'
138
139 my $tmp_head = $head;
140 # if a '*' or '?' is preceded by an odd count of '\', temporary delete
141 # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as
142 # wildcards
143 $tmp_head =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg;
144
145 if ($tmp_head =~ /[*?]/) { # if there are wildcards ...
146 @globdirs = doglob_Mac('d', $head);
147 push(@retval, doglob_Mac($cond, map {"$_$sepchr$tail"} @globdirs)),
148 next OUTER if @globdirs;
149 }
150
151 $head .= $sepchr;
152 $not_esc_head = $head;
153 # unescape $head for file operations
154 $not_esc_head =~ s/\\([*?\\])/$1/g;
155 $_ = $tail;
156 }
157 #
158 # If file component has no wildcards, we can avoid opendir
159
160 my $tmp_tail = $_;
161 # if a '*' or '?' is preceded by an odd count of '\', temporary delete
162 # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as
163 # wildcards
164 $tmp_tail =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg;
165
166 unless ($tmp_tail =~ /[*?]/) { # if there are wildcards ...
167 $not_esc_head = $head = '' if $head eq ':';
168 my $not_esc_tail = $_;
169 # unescape $head and $tail for file operations
170 $not_esc_tail =~ s/\\([*?\\])/$1/g;
171 $head .= $_;
172 $not_esc_head .= $not_esc_tail;
173 if ($cond eq 'd') { push(@retval,$head) if -d $not_esc_head }
174 else { push(@retval,$head) if -e $not_esc_head }
175 next OUTER;
176 }
177 #print "opendir($not_esc_head)\n";
178 opendir(D, $not_esc_head) or next OUTER;
179 my @leaves = readdir D;
180 closedir D;
181
182 # escape regex metachars but not '\' and glob chars '*', '?'
183 $_ =~ s:([].+^\-\${}[|]):\\$1:g;
184 # and convert DOS-style wildcards to regex,
185 # but only if they are not escaped
186 $_ =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg;
187
188 #print "regex: '$_', head: '$head', unescaped head: '$not_esc_head'\n";
189 my $matchsub = eval 'sub { $_[0] =~ m|^' . $_ . '\\z|ios }';
190 warn($@), next OUTER if $@;
191 INNER:
192 for my $e (@leaves) {
193 next INNER if $e eq '.' or $e eq '..';
194 next INNER if $cond eq 'd' and ! -d "$not_esc_head$e";
195
196 if (&$matchsub($e)) {
197 my $leave = (($not_esc_head eq ':') && (-f "$not_esc_head$e")) ?
198 "$e" : "$not_esc_head$e";
199 #
200 # On Mac OS, the two glob metachars '*' and '?' and the escape
201 # char '\' are valid characters for file and directory names.
202 # We have to escape and treat them specially.
203 $leave =~ s|([*?\\])|\\$1|g;
204 push(@matched, $leave);
205 next INNER;
206 }
207 }
208 push @retval, @matched if @matched;
209 }
210 return @retval;
211}
212
213#
214# _expand_volume() will only be used on Mac OS (Classic):
215# Takes an array of original patterns as argument and returns an array of
216# possibly modified patterns. Each original pattern is processed like
217# that:
218# + If there's a volume name in the pattern, we push a separate pattern
219# for each mounted volume that matches (with '*', '?' and '\' escaped).
220# + If there's no volume name in the original pattern, it is pushed
221# unchanged.
222# Note that the returned array of patterns may be empty.
223#
224sub _expand_volume {
225
226 require MacPerl; # to be verbose
227
228 my @pat = @_;
229 my @new_pat = ();
230 my @FSSpec_Vols = MacPerl::Volumes();
231 my @mounted_volumes = ();
232
233 foreach my $spec_vol (@FSSpec_Vols) {
234 # push all mounted volumes into array
235 push @mounted_volumes, MacPerl::MakePath($spec_vol);
236 }
237 #print "mounted volumes: |@mounted_volumes|\n";
238
239 while (@pat) {
240 my $pat = shift @pat;
241 if ($pat =~ /^([^:]+:)(.*)\z/) { # match a volume name?
242 my $vol_pat = $1;
243 my $tail = $2;
244 #
245 # escape regex metachars but not '\' and glob chars '*', '?'
246 $vol_pat =~ s:([].+^\-\${}[|]):\\$1:g;
247 # and convert DOS-style wildcards to regex,
248 # but only if they are not escaped
249 $vol_pat =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg;
250 #print "volume regex: '$vol_pat' \n";
251
252 foreach my $volume (@mounted_volumes) {
253 if ($volume =~ m|^$vol_pat\z|ios) {
254 #
255 # On Mac OS, the two glob metachars '*' and '?' and the
256 # escape char '\' are valid characters for volume names.
257 # We have to escape and treat them specially.
258 $volume =~ s|([*?\\])|\\$1|g;
259 push @new_pat, $volume . $tail;
260 }
261 }
262 } else { # no volume name in pattern, push original pattern
263 push @new_pat, $pat;
264 }
265 }
266 return @new_pat;
267}
268
08aa1457 269#
fb73857a
PP
270# this can be used to override CORE::glob in a specific
271# package by saying C<use File::DosGlob 'glob';> in that
272# namespace.
08aa1457 273#
fb73857a
PP
274
275# context (keyed by second cxix arg provided by core)
276my %iter;
277my %entries;
278
279sub glob {
b75c8c73 280 my($pat,$cxix) = @_;
163d180b 281 my @pat;
fb73857a
PP
282
283 # glob without args defaults to $_
284 $pat = $_ unless defined $pat;
285
163d180b
GS
286 # extract patterns
287 if ($pat =~ /\s/) {
288 require Text::ParseWords;
289 @pat = Text::ParseWords::parse_line('\s+',0,$pat);
290 }
291 else {
292 push @pat, $pat;
293 }
294
37248846
MM
295 # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3.
296 # abc3 will be the original {3} (and drop the {}).
297 # abc1 abc2 will be put in @appendpat.
298 # This was just the esiest way, not nearly the best.
299 REHASH: {
300 my @appendpat = ();
301 for (@pat) {
302 # There must be a "," I.E. abc{efg} is not what we want.
303 while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) {
304 my ($start, $match, $end) = ($1, $2, $3);
305 #print "Got: \n\t$start\n\t$match\n\t$end\n";
306 my $tmp = "$start$match$end";
307 while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) {
308 #print "Striped: $tmp\n";
309 # these expanshions will be preformed by the original,
310 # when we call REHASH.
311 }
312 push @appendpat, ("$tmp");
313 s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/;
314 if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) {
315 $match = $1;
316 #print "GOT: \n\t$start\n\t$match\n\t$end\n\n";
317 $_ = "$start$match$end";
318 }
319 }
320 #print "Sould have "GOT" vs "Got"!\n";
321 #FIXME: There should be checking for this.
322 # How or what should be done about failure is beond me.
323 }
324 if ( $#appendpat != -1
325 ) {
326 #print "LOOP\n";
327 #FIXME: Max loop, no way! :")
328 for ( @appendpat ) {
329 push @pat, $_;
330 }
331 goto REHASH;
332 }
333 }
334 for ( @pat ) {
335 s/\\{/{/g;
336 s/\\}/}/g;
337 s/\\,/,/g;
338 }
339 #print join ("\n", @pat). "\n";
340
fb73857a
PP
341 # assume global context if not provided one
342 $cxix = '_G_' unless defined $cxix;
343 $iter{$cxix} = 0 unless exists $iter{$cxix};
344
345 # if we're just beginning, do it all first
346 if ($iter{$cxix} == 0) {
862f843b 347 $entries{$cxix} = [doglob(1,@pat)];
be708cc0 348 }
fb73857a
PP
349
350 # chuck it all out, quick or slow
351 if (wantarray) {
352 delete $iter{$cxix};
353 return @{delete $entries{$cxix}};
354 }
355 else {
356 if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
357 return shift @{$entries{$cxix}};
358 }
359 else {
360 # return undef for EOL
361 delete $iter{$cxix};
362 delete $entries{$cxix};
363 return undef;
364 }
365 }
366}
08aa1457 367
b75c8c73
MS
368{
369 no strict 'refs';
370
371 sub import {
08aa1457 372 my $pkg = shift;
95d94a4f 373 return unless @_;
08aa1457 374 my $sym = shift;
4dd406c2 375 my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0));
95d94a4f 376 *{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob';
b75c8c73 377 }
08aa1457 378}
08aa1457
PP
3791;
380
381__END__
382
383=head1 NAME
384
385File::DosGlob - DOS like globbing and then some
386
08aa1457
PP
387=head1 SYNOPSIS
388
389 require 5.004;
3cb6de81 390
fb73857a
PP
391 # override CORE::glob in current package
392 use File::DosGlob 'glob';
3cb6de81 393
95d94a4f
GS
394 # override CORE::glob in ALL packages (use with extreme caution!)
395 use File::DosGlob 'GLOBAL_glob';
396
08aa1457
PP
397 @perlfiles = glob "..\\pe?l/*.p?";
398 print <..\\pe?l/*.p?>;
3cb6de81 399
fb73857a 400 # from the command line (overrides only in main::)
08aa1457 401 > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>"
08aa1457
PP
402
403=head1 DESCRIPTION
404
405A module that implements DOS-like globbing with a few enhancements.
dfb634a9 406It is largely compatible with perlglob.exe (the M$ setargv.obj
08aa1457
PP
407version) in all but one respect--it understands wildcards in
408directory components.
409
410For example, C<<..\\l*b\\file/*glob.p?>> will work as expected (in
411that it will find something like '..\lib\File/DosGlob.pm' alright).
412Note that all path components are case-insensitive, and that
413backslashes and forward slashes are both accepted, and preserved.
414You may have to double the backslashes if you are putting them in
415literally, due to double-quotish parsing of the pattern by perl.
416
163d180b
GS
417Spaces in the argument delimit distinct patterns, so
418C<glob('*.exe *.dll')> globs all filenames that end in C<.exe>
419or C<.dll>. If you want to put in literal spaces in the glob
420pattern, you can escape them with either double quotes, or backslashes.
421e.g. C<glob('c:/"Program Files"/*/*.dll')>, or
422C<glob('c:/Program\ Files/*/*.dll')>. The argument is tokenized using
423C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details
424of the quoting rules used.
425
08aa1457
PP
426Extending it to csh patterns is left as an exercise to the reader.
427
be708cc0
JH
428=head1 NOTES
429
430=over 4
431
432=item *
433
434Mac OS (Classic) users should note a few differences. The specification
435of pathnames in glob patterns adheres to the usual Mac OS conventions:
436The path separator is a colon ':', not a slash '/' or backslash '\'. A
437full path always begins with a volume name. A relative pathname on Mac
438OS must always begin with a ':', except when specifying a file or
439directory name in the current working directory, where the leading colon
440is optional. If specifying a volume name only, a trailing ':' is
441required. Due to these rules, a glob like E<lt>*:E<gt> will find all
442mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find
443all files and directories in the current directory.
444
445Note that updirs in the glob pattern are resolved before the matching begins,
446i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also,
447that a single trailing ':' in the pattern is ignored (unless it's a volume
448name pattern like "*HD:"), i.e. a glob like <:*:> will find both directories
449I<and> files (and not, as one might expect, only directories).
450
451The metachars '*', '?' and the escape char '\' are valid characters in
452volume, directory and file names on Mac OS. Hence, if you want to match
453a '*', '?' or '\' literally, you have to escape these characters. Due to
454perl's quoting rules, things may get a bit complicated, when you want to
455match a string like '\*' literally, or when you want to match '\' literally,
456but treat the immediately following character '*' as metachar. So, here's a
457rule of thumb (applies to both single- and double-quoted strings): escape
458each '*' or '?' or '\' with a backslash, if you want to treat them literally,
459and then double each backslash and your are done. E.g.
460
461- Match '\*' literally
462
463 escape both '\' and '*' : '\\\*'
464 double the backslashes : '\\\\\\*'
465
466(Internally, the glob routine sees a '\\\*', which means that both '\' and
467'*' are escaped.)
468
469
470- Match '\' literally, treat '*' as metachar
471
472 escape '\' but not '*' : '\\*'
473 double the backslashes : '\\\\*'
474
475(Internally, the glob routine sees a '\\*', which means that '\' is escaped and
476'*' is not.)
477
478Note that you also have to quote literal spaces in the glob pattern, as described
479above.
480
481=back
482
08aa1457
PP
483=head1 EXPORTS (by request only)
484
485glob()
486
487=head1 BUGS
488
489Should probably be built into the core, and needs to stop
490pandering to DOS habits. Needs a dose of optimizium too.
491
492=head1 AUTHOR
493
6e238990 494Gurusamy Sarathy <gsar@activestate.com>
08aa1457
PP
495
496=head1 HISTORY
497
498=over 4
499
500=item *
501
95d94a4f
GS
502Support for globally overriding glob() (GSAR 3-JUN-98)
503
504=item *
505
fb73857a
PP
506Scalar context, independent iterator context fixes (GSAR 15-SEP-97)
507
508=item *
509
08aa1457
PP
510A few dir-vs-file optimizations result in glob importation being
51110 times faster than using perlglob.exe, and using perlglob.bat is
512only twice as slow as perlglob.exe (GSAR 28-MAY-97)
513
514=item *
515
516Several cleanups prompted by lack of compatible perlglob.exe
517under Borland (GSAR 27-MAY-97)
518
519=item *
520
521Initial version (GSAR 20-FEB-97)
522
523=back
524
525=head1 SEE ALSO
526
527perl
528
dfb634a9
GS
529perlglob.bat
530
163d180b
GS
531Text::ParseWords
532
08aa1457
PP
533=cut
534