This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
* Fixed output for lex-like scanners example in perlop
[perl5.git] / lib / File / DosGlob.pm
CommitLineData
08aa1457 1#!perl -w
2
37248846
MM
3# use strict fails
4#Can't use string ("main::glob") as a symbol ref while "strict refs" in use at /usr/lib/perl5/5.005/File/DosGlob.pm line 191.
5
08aa1457 6#
7# Documentation at the __END__
8#
9
10package File::DosGlob;
11
862f843b 12our $VERSION = '1.02';
b75c8c73 13use strict;
b395063c 14use warnings;
b75c8c73 15
08aa1457 16sub doglob {
17 my $cond = shift;
18 my @retval = ();
19 #print "doglob: ", join('|', @_), "\n";
20 OUTER:
b75c8c73 21 for my $pat (@_) {
08aa1457 22 my @matched = ();
23 my @globdirs = ();
24 my $head = '.';
25 my $sepchr = '/';
b75c8c73
MS
26 my $tail;
27 next OUTER unless defined $pat and $pat ne '';
08aa1457 28 # if arg is within quotes strip em and do no globbing
b75c8c73
MS
29 if ($pat =~ /^"(.*)"\z/s) {
30 $pat = $1;
31 if ($cond eq 'd') { push(@retval, $pat) if -d $pat }
32 else { push(@retval, $pat) if -e $pat }
08aa1457 33 next OUTER;
34 }
4dd406c2
GS
35 # wildcards with a drive prefix such as h:*.pm must be changed
36 # to h:./*.pm to expand correctly
b75c8c73 37 if ($pat =~ m|^([A-Za-z]:)[^/\\]|s) {
a371bcf3 38 substr($pat,0,2) = $1 . "./";
4dd406c2 39 }
b75c8c73 40 if ($pat =~ m|^(.*)([\\/])([^\\/]*)\z|s) {
08aa1457 41 ($head, $sepchr, $tail) = ($1,$2,$3);
42 #print "div: |$head|$sepchr|$tail|\n";
b75c8c73 43 push (@retval, $pat), next OUTER if $tail eq '';
08aa1457 44 if ($head =~ /[*?]/) {
45 @globdirs = doglob('d', $head);
46 push(@retval, doglob($cond, map {"$_$sepchr$tail"} @globdirs)),
47 next OUTER if @globdirs;
48 }
4dd406c2 49 $head .= $sepchr if $head eq '' or $head =~ /^[A-Za-z]:\z/s;
b75c8c73 50 $pat = $tail;
08aa1457 51 }
52 #
53 # If file component has no wildcards, we can avoid opendir
b75c8c73 54 unless ($pat =~ /[*?]/) {
08aa1457 55 $head = '' if $head eq '.';
56 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
b75c8c73 57 $head .= $pat;
08aa1457 58 if ($cond eq 'd') { push(@retval,$head) if -d $head }
59 else { push(@retval,$head) if -e $head }
60 next OUTER;
61 }
62 opendir(D, $head) or next OUTER;
63 my @leaves = readdir D;
64 closedir D;
65 $head = '' if $head eq '.';
66 $head .= $sepchr unless $head eq '' or substr($head,-1) eq $sepchr;
67
68 # escape regex metachars but not glob chars
b75c8c73 69 $pat =~ s:([].+^\-\${}[|]):\\$1:g;
08aa1457 70 # and convert DOS-style wildcards to regex
b75c8c73
MS
71 $pat =~ s/\*/.*/g;
72 $pat =~ s/\?/.?/g;
08aa1457 73
b75c8c73
MS
74 #print "regex: '$pat', head: '$head'\n";
75 my $matchsub = sub { $_[0] =~ m|^$pat\z|is };
08aa1457 76 INNER:
77 for my $e (@leaves) {
78 next INNER if $e eq '.' or $e eq '..';
79 next INNER if $cond eq 'd' and ! -d "$head$e";
80 push(@matched, "$head$e"), next INNER if &$matchsub($e);
81 #
82 # [DOS compatibility special case]
83 # Failed, add a trailing dot and try again, but only
84 # if name does not have a dot in it *and* pattern
85 # has a dot *and* name is shorter than 9 chars.
86 #
87 if (index($e,'.') == -1 and length($e) < 9
b75c8c73 88 and index($pat,'\\.') != -1) {
08aa1457 89 push(@matched, "$head$e"), next INNER if &$matchsub("$e.");
90 }
91 }
92 push @retval, @matched if @matched;
93 }
94 return @retval;
95}
96
be708cc0
JH
97
98#
99# Do DOS-like globbing on Mac OS
100#
101sub doglob_Mac {
102 my $cond = shift;
103 my @retval = ();
104
105 #print "doglob_Mac: ", join('|', @_), "\n";
106 OUTER:
107 for my $arg (@_) {
108 local $_ = $arg;
109 my @matched = ();
110 my @globdirs = ();
111 my $head = ':';
112 my $not_esc_head = $head;
113 my $sepchr = ':';
114 next OUTER unless defined $_ and $_ ne '';
115 # if arg is within quotes strip em and do no globbing
116 if (/^"(.*)"\z/s) {
117 $_ = $1;
118 # $_ may contain escaped metachars '\*', '\?' and '\'
119 my $not_esc_arg = $_;
120 $not_esc_arg =~ s/\\([*?\\])/$1/g;
121 if ($cond eq 'd') { push(@retval, $not_esc_arg) if -d $not_esc_arg }
122 else { push(@retval, $not_esc_arg) if -e $not_esc_arg }
123 next OUTER;
124 }
125
126 if (m|^(.*?)(:+)([^:]*)\z|s) { # note: $1 is not greedy
127 my $tail;
128 ($head, $sepchr, $tail) = ($1,$2,$3);
129 #print "div: |$head|$sepchr|$tail|\n";
130 push (@retval, $_), next OUTER if $tail eq '';
131 #
132 # $head may contain escaped metachars '\*' and '\?'
133
134 my $tmp_head = $head;
135 # if a '*' or '?' is preceded by an odd count of '\', temporary delete
136 # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as
137 # wildcards
138 $tmp_head =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg;
139
140 if ($tmp_head =~ /[*?]/) { # if there are wildcards ...
141 @globdirs = doglob_Mac('d', $head);
142 push(@retval, doglob_Mac($cond, map {"$_$sepchr$tail"} @globdirs)),
143 next OUTER if @globdirs;
144 }
145
146 $head .= $sepchr;
147 $not_esc_head = $head;
148 # unescape $head for file operations
149 $not_esc_head =~ s/\\([*?\\])/$1/g;
150 $_ = $tail;
151 }
152 #
153 # If file component has no wildcards, we can avoid opendir
154
155 my $tmp_tail = $_;
156 # if a '*' or '?' is preceded by an odd count of '\', temporary delete
157 # it (and its preceding backslashes), i.e. don't treat '\*' and '\?' as
158 # wildcards
159 $tmp_tail =~ s/(\\*)([*?])/$2 x ((length($1) + 1) % 2)/eg;
160
161 unless ($tmp_tail =~ /[*?]/) { # if there are wildcards ...
162 $not_esc_head = $head = '' if $head eq ':';
163 my $not_esc_tail = $_;
164 # unescape $head and $tail for file operations
165 $not_esc_tail =~ s/\\([*?\\])/$1/g;
166 $head .= $_;
167 $not_esc_head .= $not_esc_tail;
168 if ($cond eq 'd') { push(@retval,$head) if -d $not_esc_head }
169 else { push(@retval,$head) if -e $not_esc_head }
170 next OUTER;
171 }
172 #print "opendir($not_esc_head)\n";
173 opendir(D, $not_esc_head) or next OUTER;
174 my @leaves = readdir D;
175 closedir D;
176
177 # escape regex metachars but not '\' and glob chars '*', '?'
178 $_ =~ s:([].+^\-\${}[|]):\\$1:g;
179 # and convert DOS-style wildcards to regex,
180 # but only if they are not escaped
181 $_ =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg;
182
183 #print "regex: '$_', head: '$head', unescaped head: '$not_esc_head'\n";
184 my $matchsub = eval 'sub { $_[0] =~ m|^' . $_ . '\\z|ios }';
185 warn($@), next OUTER if $@;
186 INNER:
187 for my $e (@leaves) {
188 next INNER if $e eq '.' or $e eq '..';
189 next INNER if $cond eq 'd' and ! -d "$not_esc_head$e";
190
191 if (&$matchsub($e)) {
192 my $leave = (($not_esc_head eq ':') && (-f "$not_esc_head$e")) ?
193 "$e" : "$not_esc_head$e";
194 #
195 # On Mac OS, the two glob metachars '*' and '?' and the escape
196 # char '\' are valid characters for file and directory names.
197 # We have to escape and treat them specially.
198 $leave =~ s|([*?\\])|\\$1|g;
199 push(@matched, $leave);
200 next INNER;
201 }
202 }
203 push @retval, @matched if @matched;
204 }
205 return @retval;
206}
207
208#
209# _expand_volume() will only be used on Mac OS (Classic):
210# Takes an array of original patterns as argument and returns an array of
211# possibly modified patterns. Each original pattern is processed like
212# that:
213# + If there's a volume name in the pattern, we push a separate pattern
214# for each mounted volume that matches (with '*', '?' and '\' escaped).
215# + If there's no volume name in the original pattern, it is pushed
216# unchanged.
217# Note that the returned array of patterns may be empty.
218#
219sub _expand_volume {
220
221 require MacPerl; # to be verbose
222
223 my @pat = @_;
224 my @new_pat = ();
225 my @FSSpec_Vols = MacPerl::Volumes();
226 my @mounted_volumes = ();
227
228 foreach my $spec_vol (@FSSpec_Vols) {
229 # push all mounted volumes into array
230 push @mounted_volumes, MacPerl::MakePath($spec_vol);
231 }
232 #print "mounted volumes: |@mounted_volumes|\n";
233
234 while (@pat) {
235 my $pat = shift @pat;
236 if ($pat =~ /^([^:]+:)(.*)\z/) { # match a volume name?
237 my $vol_pat = $1;
238 my $tail = $2;
239 #
240 # escape regex metachars but not '\' and glob chars '*', '?'
241 $vol_pat =~ s:([].+^\-\${}[|]):\\$1:g;
242 # and convert DOS-style wildcards to regex,
243 # but only if they are not escaped
244 $vol_pat =~ s/(\\*)([*?])/$1 . ('.' x ((length($1) + 1) % 2)) . $2/eg;
245 #print "volume regex: '$vol_pat' \n";
246
247 foreach my $volume (@mounted_volumes) {
248 if ($volume =~ m|^$vol_pat\z|ios) {
249 #
250 # On Mac OS, the two glob metachars '*' and '?' and the
251 # escape char '\' are valid characters for volume names.
252 # We have to escape and treat them specially.
253 $volume =~ s|([*?\\])|\\$1|g;
254 push @new_pat, $volume . $tail;
255 }
256 }
257 } else { # no volume name in pattern, push original pattern
258 push @new_pat, $pat;
259 }
260 }
261 return @new_pat;
262}
263
08aa1457 264#
fb73857a 265# this can be used to override CORE::glob in a specific
266# package by saying C<use File::DosGlob 'glob';> in that
267# namespace.
08aa1457 268#
fb73857a 269
270# context (keyed by second cxix arg provided by core)
271my %iter;
272my %entries;
273
274sub glob {
b75c8c73 275 my($pat,$cxix) = @_;
163d180b 276 my @pat;
fb73857a 277
278 # glob without args defaults to $_
279 $pat = $_ unless defined $pat;
280
163d180b
GS
281 # extract patterns
282 if ($pat =~ /\s/) {
283 require Text::ParseWords;
284 @pat = Text::ParseWords::parse_line('\s+',0,$pat);
285 }
286 else {
287 push @pat, $pat;
288 }
289
37248846
MM
290 # Mike Mestnik: made to do abc{1,2,3} == abc1 abc2 abc3.
291 # abc3 will be the original {3} (and drop the {}).
292 # abc1 abc2 will be put in @appendpat.
293 # This was just the esiest way, not nearly the best.
294 REHASH: {
295 my @appendpat = ();
296 for (@pat) {
297 # There must be a "," I.E. abc{efg} is not what we want.
298 while ( /^(.*)(?<!\\)\{(.*?)(?<!\\)\,.*?(?<!\\)\}(.*)$/ ) {
299 my ($start, $match, $end) = ($1, $2, $3);
300 #print "Got: \n\t$start\n\t$match\n\t$end\n";
301 my $tmp = "$start$match$end";
302 while ( $tmp =~ s/^(.*?)(?<!\\)\{(?:.*(?<!\\)\,)?(.*\Q$match\E.*?)(?:(?<!\\)\,.*)?(?<!\\)\}(.*)$/$1$2$3/ ) {
303 #print "Striped: $tmp\n";
304 # these expanshions will be preformed by the original,
305 # when we call REHASH.
306 }
307 push @appendpat, ("$tmp");
308 s/^\Q$start\E(?<!\\)\{\Q$match\E(?<!\\)\,/$start\{/;
309 if ( /^\Q$start\E(?<!\\)\{(?!.*?(?<!\\)\,.*?\Q$end\E$)(.*)(?<!\\)\}\Q$end\E$/ ) {
310 $match = $1;
311 #print "GOT: \n\t$start\n\t$match\n\t$end\n\n";
312 $_ = "$start$match$end";
313 }
314 }
315 #print "Sould have "GOT" vs "Got"!\n";
316 #FIXME: There should be checking for this.
317 # How or what should be done about failure is beond me.
318 }
319 if ( $#appendpat != -1
320 ) {
321 #print "LOOP\n";
322 #FIXME: Max loop, no way! :")
323 for ( @appendpat ) {
324 push @pat, $_;
325 }
326 goto REHASH;
327 }
328 }
329 for ( @pat ) {
330 s/\\{/{/g;
331 s/\\}/}/g;
332 s/\\,/,/g;
333 }
334 #print join ("\n", @pat). "\n";
335
fb73857a 336 # assume global context if not provided one
337 $cxix = '_G_' unless defined $cxix;
338 $iter{$cxix} = 0 unless exists $iter{$cxix};
339
340 # if we're just beginning, do it all first
341 if ($iter{$cxix} == 0) {
862f843b 342 $entries{$cxix} = [doglob(1,@pat)];
be708cc0 343 }
fb73857a 344
345 # chuck it all out, quick or slow
346 if (wantarray) {
347 delete $iter{$cxix};
348 return @{delete $entries{$cxix}};
349 }
350 else {
351 if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
352 return shift @{$entries{$cxix}};
353 }
354 else {
355 # return undef for EOL
356 delete $iter{$cxix};
357 delete $entries{$cxix};
358 return undef;
359 }
360 }
361}
08aa1457 362
b75c8c73
MS
363{
364 no strict 'refs';
365
366 sub import {
08aa1457 367 my $pkg = shift;
95d94a4f 368 return unless @_;
08aa1457 369 my $sym = shift;
4dd406c2 370 my $callpkg = ($sym =~ s/^GLOBAL_//s ? 'CORE::GLOBAL' : caller(0));
95d94a4f 371 *{$callpkg.'::'.$sym} = \&{$pkg.'::'.$sym} if $sym eq 'glob';
b75c8c73 372 }
08aa1457 373}
08aa1457 3741;
375
376__END__
377
378=head1 NAME
379
380File::DosGlob - DOS like globbing and then some
381
08aa1457 382=head1 SYNOPSIS
383
384 require 5.004;
3cb6de81 385
fb73857a 386 # override CORE::glob in current package
387 use File::DosGlob 'glob';
3cb6de81 388
95d94a4f
GS
389 # override CORE::glob in ALL packages (use with extreme caution!)
390 use File::DosGlob 'GLOBAL_glob';
391
08aa1457 392 @perlfiles = glob "..\\pe?l/*.p?";
393 print <..\\pe?l/*.p?>;
3cb6de81 394
fb73857a 395 # from the command line (overrides only in main::)
08aa1457 396 > perl -MFile::DosGlob=glob -e "print <../pe*/*p?>"
08aa1457 397
398=head1 DESCRIPTION
399
400A module that implements DOS-like globbing with a few enhancements.
dfb634a9 401It is largely compatible with perlglob.exe (the M$ setargv.obj
08aa1457 402version) in all but one respect--it understands wildcards in
403directory components.
404
405For example, C<<..\\l*b\\file/*glob.p?>> will work as expected (in
406that it will find something like '..\lib\File/DosGlob.pm' alright).
407Note that all path components are case-insensitive, and that
408backslashes and forward slashes are both accepted, and preserved.
409You may have to double the backslashes if you are putting them in
410literally, due to double-quotish parsing of the pattern by perl.
411
163d180b
GS
412Spaces in the argument delimit distinct patterns, so
413C<glob('*.exe *.dll')> globs all filenames that end in C<.exe>
414or C<.dll>. If you want to put in literal spaces in the glob
415pattern, you can escape them with either double quotes, or backslashes.
416e.g. C<glob('c:/"Program Files"/*/*.dll')>, or
417C<glob('c:/Program\ Files/*/*.dll')>. The argument is tokenized using
418C<Text::ParseWords::parse_line()>, so see L<Text::ParseWords> for details
419of the quoting rules used.
420
08aa1457 421Extending it to csh patterns is left as an exercise to the reader.
422
be708cc0
JH
423=head1 NOTES
424
425=over 4
426
427=item *
428
429Mac OS (Classic) users should note a few differences. The specification
430of pathnames in glob patterns adheres to the usual Mac OS conventions:
431The path separator is a colon ':', not a slash '/' or backslash '\'. A
432full path always begins with a volume name. A relative pathname on Mac
433OS must always begin with a ':', except when specifying a file or
434directory name in the current working directory, where the leading colon
435is optional. If specifying a volume name only, a trailing ':' is
436required. Due to these rules, a glob like E<lt>*:E<gt> will find all
437mounted volumes, while a glob like E<lt>*E<gt> or E<lt>:*E<gt> will find
438all files and directories in the current directory.
439
440Note that updirs in the glob pattern are resolved before the matching begins,
441i.e. a pattern like "*HD:t?p::a*" will be matched as "*HD:a*". Note also,
442that a single trailing ':' in the pattern is ignored (unless it's a volume
443name pattern like "*HD:"), i.e. a glob like <:*:> will find both directories
444I<and> files (and not, as one might expect, only directories).
445
446The metachars '*', '?' and the escape char '\' are valid characters in
447volume, directory and file names on Mac OS. Hence, if you want to match
448a '*', '?' or '\' literally, you have to escape these characters. Due to
449perl's quoting rules, things may get a bit complicated, when you want to
450match a string like '\*' literally, or when you want to match '\' literally,
451but treat the immediately following character '*' as metachar. So, here's a
452rule of thumb (applies to both single- and double-quoted strings): escape
453each '*' or '?' or '\' with a backslash, if you want to treat them literally,
454and then double each backslash and your are done. E.g.
455
456- Match '\*' literally
457
458 escape both '\' and '*' : '\\\*'
459 double the backslashes : '\\\\\\*'
460
461(Internally, the glob routine sees a '\\\*', which means that both '\' and
462'*' are escaped.)
463
464
465- Match '\' literally, treat '*' as metachar
466
467 escape '\' but not '*' : '\\*'
468 double the backslashes : '\\\\*'
469
470(Internally, the glob routine sees a '\\*', which means that '\' is escaped and
471'*' is not.)
472
473Note that you also have to quote literal spaces in the glob pattern, as described
474above.
475
476=back
477
08aa1457 478=head1 EXPORTS (by request only)
479
480glob()
481
482=head1 BUGS
483
484Should probably be built into the core, and needs to stop
485pandering to DOS habits. Needs a dose of optimizium too.
486
487=head1 AUTHOR
488
6e238990 489Gurusamy Sarathy <gsar@activestate.com>
08aa1457 490
491=head1 HISTORY
492
493=over 4
494
495=item *
496
95d94a4f
GS
497Support for globally overriding glob() (GSAR 3-JUN-98)
498
499=item *
500
fb73857a 501Scalar context, independent iterator context fixes (GSAR 15-SEP-97)
502
503=item *
504
08aa1457 505A few dir-vs-file optimizations result in glob importation being
50610 times faster than using perlglob.exe, and using perlglob.bat is
507only twice as slow as perlglob.exe (GSAR 28-MAY-97)
508
509=item *
510
511Several cleanups prompted by lack of compatible perlglob.exe
512under Borland (GSAR 27-MAY-97)
513
514=item *
515
516Initial version (GSAR 20-FEB-97)
517
518=back
519
520=head1 SEE ALSO
521
522perl
523
dfb634a9
GS
524perlglob.bat
525
163d180b
GS
526Text::ParseWords
527
08aa1457 528=cut
529