This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Convert to a lexical file handle.
[perl5.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
2aa840f6 6our $VERSION = '1.22';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
7bd31527 10#
98dc9551 11# Modified to ensure sub-directory traversal order is not inverted by stack
7bd31527
JH
12# push and pops. That is remains in the same order as in the directory file,
13# or user pre-processing (EG:sorted).
14#
15
f06db76b
AD
16=head1 NAME
17
abfdd623 18File::Find - Traverse a directory tree.
f06db76b
AD
19
20=head1 SYNOPSIS
21
22 use File::Find;
6d355c6e 23 find(\&wanted, @directories_to_search);
f06db76b 24 sub wanted { ... }
237437d0 25
f06db76b 26 use File::Find;
abfdd623 27 finddepth(\&wanted, @directories_to_search);
f06db76b 28 sub wanted { ... }
3cb6de81 29
81793b90
GS
30 use File::Find;
31 find({ wanted => \&process, follow => 1 }, '.');
f06db76b
AD
32
33=head1 DESCRIPTION
34
abfdd623
MS
35These are functions for searching through directory trees doing work
36on each file found similar to the Unix I<find> command. File::Find
37exports two functions, C<find> and C<finddepth>. They work similarly
38but have subtle differences.
39
40=over 4
41
42=item B<find>
43
44 find(\&wanted, @directories);
45 find(\%options, @directories);
46
6eb87ff8
MJD
47C<find()> does a depth-first search over the given C<@directories> in
48the order they are given. For each file or directory found, it calls
49the C<&wanted> subroutine. (See below for details on how to use the
50C<&wanted> function). Additionally, for each directory found, it will
51C<chdir()> into that directory and continue the search, invoking the
52C<&wanted> function on each file or subdirectory in the directory.
abfdd623
MS
53
54=item B<finddepth>
55
56 finddepth(\&wanted, @directories);
57 finddepth(\%options, @directories);
58
b807ffce 59C<finddepth()> works just like C<find()> except that it invokes the
6eb87ff8
MJD
60C<&wanted> function for a directory I<after> invoking it for the
61directory's contents. It does a postorder traversal instead of a
62preorder traversal, working from the bottom of the directory tree up
63where C<find()> works from the top of the tree down.
abfdd623
MS
64
65=back
66
67=head2 %options
68
95e23d19
AL
69The first argument to C<find()> is either a code reference to your
70C<&wanted> function, or a hash reference describing the operations
71to be performed for each file. The
abfdd623 72code reference is described in L<The wanted function> below.
20408e3c 73
81793b90
GS
74Here are the possible keys for the hash:
75
76=over 3
77
78=item C<wanted>
79
abfdd623 80The value should be a code reference. This code reference is
4c90698d
RB
81described in L<The wanted function> below. The C<&wanted> subroutine is
82mandatory.
81793b90
GS
83
84=item C<bydepth>
85
86Reports the name of a directory only AFTER all its entries
95e23d19 87have been reported. Entry point C<finddepth()> is a shortcut for
788c0c32 88specifying C<< { bydepth => 1 } >> in the first argument of C<find()>.
81793b90 89
719c805e
JS
90=item C<preprocess>
91
3555aed3
SP
92The value should be a code reference. This code reference is used to
93preprocess the current directory. The name of the currently processed
95e23d19
AL
94directory is in C<$File::Find::dir>. Your preprocessing function is
95called after C<readdir()>, but before the loop that calls the C<wanted()>
3555aed3
SP
96function. It is called with a list of strings (actually file/directory
97names) and is expected to return a list of strings. The code can be
98used to sort the file/directory names alphabetically, numerically,
99or to filter out directory entries based on their name alone. When
7e47e6ff 100I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e
JS
101
102=item C<postprocess>
103
3555aed3
SP
104The value should be a code reference. It is invoked just before leaving
105the currently processed directory. It is called in void context with no
106arguments. The name of the current directory is in C<$File::Find::dir>. This
107hook is handy for summarizing a directory, such as calculating its disk
108usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 109no-op.
719c805e 110
81793b90
GS
111=item C<follow>
112
113Causes symbolic links to be followed. Since directory trees with symbolic
114links (followed) may contain files more than once and may even have
115cycles, a hash has to be built up with an entry for each file.
116This might be expensive both in space and time for a large
f4d05d5f 117directory tree. See L</follow_fast> and L</follow_skip> below.
81793b90
GS
118If either I<follow> or I<follow_fast> is in effect:
119
120=over 6
121
a45bd81d 122=item *
81793b90 123
54bd407c
RGS
124It is guaranteed that an I<lstat> has been called before the user's
125C<wanted()> function is called. This enables fast file checks involving S<_>.
126Note that this guarantee no longer holds if I<follow> or I<follow_fast>
127are not set.
81793b90 128
a45bd81d 129=item *
81793b90
GS
130
131There is a variable C<$File::Find::fullname> which holds the absolute
615a2b9b
SP
132pathname of the file with all symbolic links resolved. If the link is
133a dangling symbolic link, then fullname will be set to C<undef>.
81793b90
GS
134
135=back
136
204b4d7f
SP
137This is a no-op on Win32.
138
81793b90
GS
139=item C<follow_fast>
140
f10e1564
RM
141This is similar to I<follow> except that it may report some files more
142than once. It does detect cycles, however. Since only symbolic links
143have to be hashed, this is much cheaper both in space and time. If
95e23d19 144processing a file more than once (by the user's C<wanted()> function)
81793b90
GS
145is worse than just taking time, the option I<follow> should be used.
146
204b4d7f
SP
147This is also a no-op on Win32.
148
81793b90
GS
149=item C<follow_skip>
150
151C<follow_skip==1>, which is the default, causes all files which are
152neither directories nor symbolic links to be ignored if they are about
3555aed3 153to be processed a second time. If a directory or a symbolic link
81793b90 154are about to be processed a second time, File::Find dies.
95e23d19 155
81793b90
GS
156C<follow_skip==0> causes File::Find to die if any file is about to be
157processed a second time.
95e23d19 158
81793b90 159C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 160directories but to proceed normally otherwise.
20408e3c 161
80e52b73
JH
162=item C<dangling_symlinks>
163
164If true and a code reference, will be called with the symbolic link
165name and the directory it lives in as arguments. Otherwise, if true
166and warnings are on, warning "symbolic_link_name is a dangling
167symbolic link\n" will be issued. If false, the dangling symbolic link
168will be silently ignored.
f06db76b 169
81793b90
GS
170=item C<no_chdir>
171
95e23d19 172Does not C<chdir()> to each directory as it recurses. The C<wanted()>
81793b90
GS
173function will need to be aware of this, of course. In this case,
174C<$_> will be the same as C<$File::Find::name>.
175
176=item C<untaint>
177
178If find is used in taint-mode (-T command line switch or if EUID != UID
179or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 180before they can be chdir'ed to. Therefore they are checked against a regular
3555aed3
SP
181expression I<untaint_pattern>. Note that all names passed to the user's
182I<wanted()> function are still tainted. If this option is used while
7e47e6ff 183not in taint-mode, C<untaint> is a no-op.
81793b90
GS
184
185=item C<untaint_pattern>
186
187See above. This should be set using the C<qr> quoting operator.
3555aed3 188The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 189Note that the parentheses are vital.
81793b90
GS
190
191=item C<untaint_skip>
192
3555aed3 193If set, a directory which fails the I<untaint_pattern> is skipped,
7e47e6ff 194including all its sub-directories. The default is to 'die' in such a case.
81793b90
GS
195
196=back
197
abfdd623
MS
198=head2 The wanted function
199
95e23d19
AL
200The C<wanted()> function does whatever verifications you want on
201each file and directory. Note that despite its name, the C<wanted()>
202function is a generic callback function, and does B<not> tell
203File::Find if a file is "wanted" or not. In fact, its return value
204is ignored.
205
206The wanted function takes no arguments but rather does its work
abfdd623
MS
207through a collection of variables.
208
209=over 4
210
f837ebe2 211=item C<$File::Find::dir> is the current directory name,
abfdd623
MS
212
213=item C<$_> is the current filename within that directory
214
f837ebe2 215=item C<$File::Find::name> is the complete pathname to the file.
abfdd623
MS
216
217=back
218
4c767a4e 219The above variables have all been localized and may be changed without
026810a8 220affecting data outside of the wanted function.
4c767a4e 221
95e23d19 222For example, when examining the file F</some/path/foo.ext> you will have:
abfdd623
MS
223
224 $File::Find::dir = /some/path/
225 $_ = foo.ext
226 $File::Find::name = /some/path/foo.ext
227
3555aed3 228You are chdir()'d to C<$File::Find::dir> when the function is called,
abfdd623
MS
229unless C<no_chdir> was specified. Note that when changing to
230directories is in effect the root directory (F</>) is a somewhat
231special case inasmuch as the concatenation of C<$File::Find::dir>,
232C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
233table below summarizes all variants:
5cf0a2f2
WL
234
235 $File::Find::name $File::Find::dir $_
236 default / / .
237 no_chdir=>0 /etc / etc
238 /etc/x /etc x
abfdd623 239
5cf0a2f2
WL
240 no_chdir=>1 / / /
241 /etc / /etc
242 /etc/x /etc /etc/x
243
244
788c0c32 245When C<follow> or C<follow_fast> are in effect, there is
f10e1564
RM
246also a C<$File::Find::fullname>. The function may set
247C<$File::Find::prune> to prune the tree unless C<bydepth> was
248specified. Unless C<follow> or C<follow_fast> is specified, for
249compatibility reasons (find.pl, find2perl) there are in addition the
250following globals available: C<$File::Find::topdir>,
251C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 252C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 253
20408e3c 254This library is useful for the C<find2perl> tool, which when fed,
f06db76b
AD
255
256 find2perl / -name .nfs\* -mtime +7 \
81793b90 257 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b
AD
258
259produces something like:
260
261 sub wanted {
c7b9dd21 262 /^\.nfs.*\z/s &&
81793b90 263 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b
AD
264 int(-M _) > 7 &&
265 unlink($_)
266 ||
81793b90 267 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 268 $dev < 0 &&
6280b799 269 ($File::Find::prune = 1);
f06db76b
AD
270 }
271
43dece2a
JH
272Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
273filehandle that caches the information from the preceding
95e23d19 274C<stat()>, C<lstat()>, or filetest.
43dece2a 275
1cffc1dd
JH
276Here's another interesting wanted function. It will find all symbolic
277links that don't resolve:
f06db76b
AD
278
279 sub wanted {
81793b90 280 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 281 }
f06db76b 282
c82ae0bc 283Note that you may mix directories and (non-directory) files in the list of
04cc0b0d 284directories to be searched by the C<wanted()> function.
c82ae0bc
JK
285
286 find(\&wanted, "./foo", "./bar", "./baz/epsilon");
287
288In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be
289evaluated by C<wanted()>.
290
81793b90
GS
291See also the script C<pfind> on CPAN for a nice application of this
292module.
293
cd68ec93
RGS
294=head1 WARNINGS
295
296If you run your program with the C<-w> switch, or if you use the
297C<warnings> pragma, File::Find will report warnings for several weird
298situations. You can disable these warnings by putting the statement
299
300 no warnings 'File::Find';
301
302in the appropriate scope. See L<perllexwarn> for more info about lexical
303warnings.
304
81793b90
GS
305=head1 CAVEAT
306
5fa2bf2b
DD
307=over 2
308
309=item $dont_use_nlink
310
311You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067
T
312force File::Find to always stat directories. This was used for file systems
313that do not have an C<nlink> count matching the number of sub-directories.
314Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
315system) and a couple of others.
5fa2bf2b 316
6cf3b067
T
317You shouldn't need to set this variable, since File::Find should now detect
318such file systems on-the-fly and switch itself to using stat. This works even
319for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 320
6cf3b067 321If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b
DD
322
323=item symlinks
324
f10e1564 325Be aware that the option to follow symbolic links can be dangerous.
81793b90
GS
326Depending on the structure of the directory tree (including symbolic
327links to directories) you might traverse a given (physical) directory
3555aed3 328more than once (only if C<follow_fast> is in effect).
81793b90
GS
329Furthermore, deleting or changing files in a symbolically linked directory
330might cause very unpleasant surprises, since you delete or change files
331in an unknown directory.
0530a6c4 332
5fa2bf2b
DD
333=back
334
6eb87ff8
MJD
335=head1 BUGS AND CAVEATS
336
337Despite the name of the C<finddepth()> function, both C<find()> and
338C<finddepth()> perform a depth-first search of the directory
339hierarchy.
340
a85af077
A
341=head1 HISTORY
342
343File::Find used to produce incorrect results if called recursively.
344During the development of perl 5.8 this bug was fixed.
345The first fixed version of File::Find was 1.01.
346
9b33fb8e
DJ
347=head1 SEE ALSO
348
349find, find2perl.
350
f06db76b
AD
351=cut
352
b75c8c73
MS
353our @ISA = qw(Exporter);
354our @EXPORT = qw(find finddepth);
6280b799 355
a0d0e21e 356
81793b90
GS
357use strict;
358my $Is_VMS;
a0b245d5 359my $Is_Win32;
81793b90
GS
360
361require File::Basename;
7e47e6ff 362require File::Spec;
81793b90 363
9f826d6a
BM
364# Should ideally be my() not our() but local() currently
365# refuses to operate on lexicals
366
367our %SLnkSeen;
368our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 369 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 370 $pre_process, $post_process, $dangling_symlinks);
81793b90
GS
371
372sub contract_name {
373 my ($cdir,$fn) = @_;
374
7e47e6ff 375 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90
GS
376
377 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
378
379 $fn =~ s|^\./||;
380
381 my $abs_name= $cdir . $fn;
382
383 if (substr($fn,0,3) eq '../') {
51393fc0 384 1 while $abs_name =~ s!/[^/]*/\.\./+!/!;
81793b90
GS
385 }
386
387 return $abs_name;
388}
389
81793b90
GS
390sub PathCombine($$) {
391 my ($Base,$Name) = @_;
392 my $AbsName;
393
862f843b
NC
394 if (substr($Name,0,1) eq '/') {
395 $AbsName= $Name;
81793b90
GS
396 }
397 else {
862f843b
NC
398 $AbsName= contract_name($Base,$Name);
399 }
81793b90 400
862f843b
NC
401 # (simple) check for recursion
402 my $newlen= length($AbsName);
403 if ($newlen <= length($Base)) {
404 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
405 && $AbsName eq substr($Base,0,$newlen))
406 {
407 return undef;
81793b90
GS
408 }
409 }
410 return $AbsName;
411}
412
413sub Follow_SymLink($) {
414 my ($AbsName) = @_;
415
416 my ($NewName,$DEV, $INO);
417 ($DEV, $INO)= lstat $AbsName;
418
419 while (-l _) {
420 if ($SLnkSeen{$DEV, $INO}++) {
421 if ($follow_skip < 2) {
422 die "$AbsName is encountered a second time";
a0d0e21e
LW
423 }
424 else {
81793b90 425 return undef;
a0d0e21e
LW
426 }
427 }
81793b90
GS
428 $NewName= PathCombine($AbsName, readlink($AbsName));
429 unless(defined $NewName) {
430 if ($follow_skip < 2) {
431 die "$AbsName is a recursive symbolic link";
432 }
433 else {
434 return undef;
a0d0e21e 435 }
81793b90
GS
436 }
437 else {
438 $AbsName= $NewName;
439 }
440 ($DEV, $INO) = lstat($AbsName);
441 return undef unless defined $DEV; # dangling symbolic link
442 }
443
cd68ec93 444 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 445 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90
GS
446 die "$AbsName encountered a second time";
447 }
448 else {
449 return undef;
450 }
451 }
452
453 return $AbsName;
454}
455
17f410f9 456our($dir, $name, $fullname, $prune);
81793b90
GS
457sub _find_dir_symlnk($$$);
458sub _find_dir($$$);
459
7e47e6ff
JH
460# check whether or not a scalar variable is tainted
461# (code straight from the Camel, 3rd ed., page 561)
462sub is_tainted_pp {
463 my $arg = shift;
464 my $nada = substr($arg, 0, 0); # zero-length
465 local $@;
466 eval { eval "# $nada" };
467 return length($@) != 0;
3555aed3 468}
7e47e6ff 469
81793b90
GS
470sub _find_opt {
471 my $wanted = shift;
472 die "invalid top directory" unless defined $_[0];
473
9f826d6a
BM
474 # This function must local()ize everything because callbacks may
475 # call find() or finddepth()
476
477 local %SLnkSeen;
478 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
479 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 480 $pre_process, $post_process, $dangling_symlinks);
4c621faf 481 local($dir, $name, $fullname, $prune);
bc125c03 482 local *_ = \my $a;
9f826d6a 483
a0c9c202 484 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
a1ccf0c4
JM
485 if ($Is_VMS) {
486 # VMS returns this by default in VMS format which just doesn't
487 # work for the rest of this module.
488 $cwd = VMS::Filespec::unixpath($cwd);
489
490 # Apparently this is not expected to have a trailing space.
491 # To attempt to make VMS/UNIX conversions mostly reversable,
492 # a trailing slash is needed. The run-time functions ignore the
493 # resulting double slash, but it causes the perl tests to fail.
494 $cwd =~ s#/\z##;
495
496 # This comes up in upper case now, but should be lower.
497 # In the future this could be exact case, no need to change.
498 }
80e52b73
JH
499 my $cwd_untainted = $cwd;
500 my $check_t_cwd = 1;
501 $wanted_callback = $wanted->{wanted};
502 $bydepth = $wanted->{bydepth};
503 $pre_process = $wanted->{preprocess};
504 $post_process = $wanted->{postprocess};
505 $no_chdir = $wanted->{no_chdir};
a0b245d5
AD
506 $full_check = $Is_Win32 ? 0 : $wanted->{follow};
507 $follow = $Is_Win32 ? 0 :
1bb17459 508 $full_check || $wanted->{follow_fast};
80e52b73
JH
509 $follow_skip = $wanted->{follow_skip};
510 $untaint = $wanted->{untaint};
511 $untaint_pat = $wanted->{untaint_pattern};
512 $untaint_skip = $wanted->{untaint_skip};
513 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 514
1cffc1dd 515 # for compatibility reasons (find.pl, find2perl)
9f826d6a 516 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90
GS
517
518 # a symbolic link to a directory doesn't increase the link count
519 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
3555aed3 520
e7b91b67 521 my ($abs_dir, $Is_Dir);
81793b90
GS
522
523 Proc_Top_Item:
4c621faf 524 foreach my $TOP (@_) {
7e47e6ff 525 my $top_item = $TOP;
2aa840f6 526 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS;
7e47e6ff 527
3555aed3
SP
528 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
529
a0b245d5
AD
530 if ($Is_Win32) {
531 $top_item =~ s|[/\\]\z||
532 unless $top_item =~ m{^(?:\w:)?[/\\]$};
7e47e6ff
JH
533 }
534 else {
535 $top_item =~ s|/\z|| unless $top_item eq '/';
7e47e6ff
JH
536 }
537
538 $Is_Dir= 0;
539
540 if ($follow) {
541
862f843b
NC
542 if (substr($top_item,0,1) eq '/') {
543 $abs_dir = $top_item;
7e47e6ff 544 }
862f843b
NC
545 elsif ($top_item eq $File::Find::current_dir) {
546 $abs_dir = $cwd;
547 }
548 else { # care about any ../
549 $top_item =~ s/\.dir\z//i if $Is_VMS;
550 $abs_dir = contract_name("$cwd/",$top_item);
7e47e6ff
JH
551 }
552 $abs_dir= Follow_SymLink($abs_dir);
553 unless (defined $abs_dir) {
80e52b73
JH
554 if ($dangling_symlinks) {
555 if (ref $dangling_symlinks eq 'CODE') {
556 $dangling_symlinks->($top_item, $cwd);
557 } else {
cd68ec93 558 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73
JH
559 }
560 }
81793b90 561 next Proc_Top_Item;
7e47e6ff
JH
562 }
563
564 if (-d _) {
a1ccf0c4 565 $top_item =~ s/\.dir\z//i if $Is_VMS;
81793b90
GS
566 _find_dir_symlnk($wanted, $abs_dir, $top_item);
567 $Is_Dir= 1;
7e47e6ff
JH
568 }
569 }
81793b90 570 else { # no follow
7e47e6ff
JH
571 $topdir = $top_item;
572 unless (defined $topnlink) {
cd68ec93 573 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff
JH
574 next Proc_Top_Item;
575 }
576 if (-d _) {
544ff7a7 577 $top_item =~ s/\.dir\z//i if $Is_VMS;
e7b91b67 578 _find_dir($wanted, $top_item, $topnlink);
81793b90 579 $Is_Dir= 1;
7e47e6ff 580 }
237437d0 581 else {
81793b90 582 $abs_dir= $top_item;
7e47e6ff
JH
583 }
584 }
81793b90 585
7e47e6ff 586 unless ($Is_Dir) {
81793b90 587 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
862f843b 588 ($dir,$_) = ('./', $top_item);
81793b90
GS
589 }
590
7e47e6ff
JH
591 $abs_dir = $dir;
592 if (( $untaint ) && (is_tainted($dir) )) {
593 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90
GS
594 unless (defined $abs_dir) {
595 if ($untaint_skip == 0) {
7e47e6ff 596 die "directory $dir is still tainted";
81793b90
GS
597 }
598 else {
599 next Proc_Top_Item;
600 }
601 }
7e47e6ff 602 }
81793b90 603
7e47e6ff 604 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 605 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff
JH
606 next Proc_Top_Item;
607 }
719911cc 608
7e47e6ff 609 $name = $abs_dir . $_; # $File::Find::name
3bb6d3e5 610 $_ = $name if $no_chdir;
719911cc 611
abfdd623 612 { $wanted_callback->() }; # protect against wild "next"
81793b90 613
7e47e6ff 614 }
81793b90 615
7e47e6ff
JH
616 unless ( $no_chdir ) {
617 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
618 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
619 unless (defined $cwd_untainted) {
620 die "insecure cwd in find(depth)";
621 }
622 $check_t_cwd = 0;
623 }
624 unless (chdir $cwd_untainted) {
625 die "Can't cd to $cwd: $!\n";
626 }
627 }
81793b90
GS
628 }
629}
630
631# API:
632# $wanted
633# $p_dir : "parent directory"
634# $nlink : what came back from the stat
635# preconditions:
636# chdir (if not no_chdir) to dir
637
638sub _find_dir($$$) {
639 my ($wanted, $p_dir, $nlink) = @_;
640 my ($CdLvl,$Level) = (0,0);
641 my @Stack;
642 my @filenames;
643 my ($subcount,$sub_nlink);
644 my $SE= [];
645 my $dir_name= $p_dir;
7e47e6ff 646 my $dir_pref;
39e79f6b 647 my $dir_rel = $File::Find::current_dir;
7e47e6ff 648 my $tainted = 0;
5fa2bf2b 649 my $no_nlink;
7e47e6ff 650
a0b245d5
AD
651 if ($Is_Win32) {
652 $dir_pref
653 = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" );
654 } elsif ($Is_VMS) {
a1ccf0c4
JM
655
656 # VMS is returning trailing .dir on directories
657 # and trailing . on files and symbolic links
658 # in UNIX syntax.
659 #
660
661 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.';
662
1e9c9d75 663 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" );
7e47e6ff
JH
664 }
665 else {
666 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 667 }
81793b90
GS
668
669 local ($dir, $name, $prune, *DIR);
7e47e6ff
JH
670
671 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 672 my $udir = $p_dir;
7e47e6ff
JH
673 if (( $untaint ) && (is_tainted($p_dir) )) {
674 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90
GS
675 unless (defined $udir) {
676 if ($untaint_skip == 0) {
677 die "directory $p_dir is still tainted";
678 }
679 else {
680 return;
681 }
237437d0 682 }
a0d0e21e 683 }
8d8eebbf 684 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
cd68ec93 685 warnings::warnif "Can't cd to $udir: $!\n";
81793b90
GS
686 return;
687 }
688 }
7e47e6ff
JH
689
690 # push the starting directory
57e73c4b 691 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90
GS
692
693 while (defined $SE) {
694 unless ($bydepth) {
3555aed3
SP
695 $dir= $p_dir; # $File::Find::dir
696 $name= $dir_name; # $File::Find::name
7e47e6ff 697 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 698 # prune may happen here
7e47e6ff 699 $prune= 0;
abfdd623 700 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 701 next if $prune;
81793b90 702 }
7e47e6ff 703
81793b90 704 # change to that directory
7e47e6ff 705 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 706 my $udir= $dir_rel;
7e47e6ff
JH
707 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
708 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90
GS
709 unless (defined $udir) {
710 if ($untaint_skip == 0) {
862f843b 711 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
7e47e6ff 712 } else { # $untaint_skip == 1
3555aed3 713 next;
81793b90
GS
714 }
715 }
716 }
8d8eebbf 717 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
862f843b
NC
718 warnings::warnif "Can't cd to (" .
719 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
81793b90
GS
720 next;
721 }
722 $CdLvl++;
723 }
724
3555aed3 725 $dir= $dir_name; # $File::Find::dir
81793b90
GS
726
727 # Get the list of files in the current directory.
7e47e6ff 728 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 729 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90
GS
730 next;
731 }
732 @filenames = readdir DIR;
733 closedir(DIR);
abfdd623 734 @filenames = $pre_process->(@filenames) if $pre_process;
719c805e 735 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 736
98dc9551 737 # default: use whatever was specified
5fa2bf2b
DD
738 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
739 $no_nlink = $avoid_nlink;
740 # if dir has wrong nlink count, force switch to slower stat method
741 $no_nlink = 1 if ($nlink < 2);
742
743 if ($nlink == 2 && !$no_nlink) {
81793b90
GS
744 # This dir has no subdirectories.
745 for my $FN (@filenames) {
a1ccf0c4
JM
746 if ($Is_VMS) {
747 # Big hammer here - Compensate for VMS trailing . and .dir
748 # No win situation until this is changed, but this
749 # will handle the majority of the cases with breaking the fewest
750
751 $FN =~ s/\.dir\z//i;
752 $FN =~ s#\.$## if ($FN ne '.');
753 }
7e47e6ff 754 next if $FN =~ $File::Find::skip_pattern;
81793b90 755
7e47e6ff
JH
756 $name = $dir_pref . $FN; # $File::Find::name
757 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 758 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
759 }
760
761 }
762 else {
763 # This dir has subdirectories.
764 $subcount = $nlink - 2;
765
7bd31527
JH
766 # HACK: insert directories at this position. so as to preserve
767 # the user pre-processed ordering of files.
768 # EG: directory traversal is in user sorted order, not at random.
769 my $stack_top = @Stack;
770
81793b90 771 for my $FN (@filenames) {
7e47e6ff 772 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 773 if ($subcount > 0 || $no_nlink) {
81793b90
GS
774 # Seen all the subdirs?
775 # check for directoriness.
776 # stat is faster for a file in the current directory
07867069 777 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90
GS
778
779 if (-d _) {
780 --$subcount;
544ff7a7 781 $FN =~ s/\.dir\z//i if $Is_VMS;
7bd31527
JH
782 # HACK: replace push to preserve dir traversal order
783 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
784 splice @Stack, $stack_top, 0,
785 [$CdLvl,$dir_name,$FN,$sub_nlink];
81793b90
GS
786 }
787 else {
7e47e6ff
JH
788 $name = $dir_pref . $FN; # $File::Find::name
789 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 790 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
791 }
792 }
07867069 793 else {
7e47e6ff
JH
794 $name = $dir_pref . $FN; # $File::Find::name
795 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 796 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
797 }
798 }
799 }
17b275ff
RA
800 }
801 continue {
57e73c4b 802 while ( defined ($SE = pop @Stack) ) {
81793b90
GS
803 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
804 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 805 my $tmp;
862f843b 806 if ($Is_VMS) {
d8101854
CB
807 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']';
808 }
7e47e6ff
JH
809 else {
810 $tmp = join('/',('..') x ($CdLvl-$Level));
811 }
d8101854 812 die "Can't cd to $tmp from $dir_name"
7e47e6ff 813 unless chdir ($tmp);
81793b90
GS
814 $CdLvl = $Level;
815 }
7e47e6ff 816
b242981a
AD
817 if ($Is_Win32) {
818 $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$}
819 ? "$p_dir$dir_rel" : "$p_dir/$dir_rel");
3555aed3
SP
820 $dir_pref = "$dir_name/";
821 }
1e9c9d75
CB
822 elsif ($^O eq 'VMS') {
823 if ($p_dir =~ m/[\]>]+$/) {
824 $dir_name = $p_dir;
825 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/;
826 $dir_pref = $dir_name;
827 }
828 else {
829 $dir_name = "$p_dir/$dir_rel";
830 $dir_pref = "$dir_name/";
831 }
832 }
7e47e6ff
JH
833 else {
834 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
835 $dir_pref = "$dir_name/";
836 }
837
719c805e 838 if ( $nlink == -2 ) {
7e47e6ff 839 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 840 $_ = $File::Find::current_dir;
abfdd623 841 $post_process->(); # End-of-directory processing
7e47e6ff
JH
842 }
843 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
844 $name = $dir_name;
862f843b
NC
845 if ( substr($name,-2) eq '/.' ) {
846 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff 847 }
862f843b
NC
848 $dir = $p_dir;
849 $_ = ($no_chdir ? $dir_name : $dir_rel );
850 if ( substr($_,-2) eq '/.' ) {
851 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 852 }
abfdd623 853 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff
JH
854 }
855 else {
856 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
857 last;
858 }
81793b90 859 }
a0d0e21e
LW
860 }
861}
862
81793b90
GS
863
864# API:
865# $wanted
866# $dir_loc : absolute location of a dir
867# $p_dir : "parent directory"
868# preconditions:
869# chdir (if not no_chdir) to dir
870
871sub _find_dir_symlnk($$$) {
7e47e6ff 872 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90
GS
873 my @Stack;
874 my @filenames;
875 my $new_loc;
7e47e6ff 876 my $updir_loc = $dir_loc; # untainted parent directory
81793b90
GS
877 my $SE = [];
878 my $dir_name = $p_dir;
7e47e6ff
JH
879 my $dir_pref;
880 my $loc_pref;
39e79f6b 881 my $dir_rel = $File::Find::current_dir;
7e47e6ff
JH
882 my $byd_flag; # flag for pending stack entry if $bydepth
883 my $tainted = 0;
884 my $ok = 1;
885
862f843b
NC
886 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
887 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
81793b90
GS
888
889 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff
JH
890
891 unless ($no_chdir) {
892 # untaint the topdir
893 if (( $untaint ) && (is_tainted($dir_loc) )) {
894 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
895 # once untainted, $updir_loc is pushed on the stack (as parent directory);
3555aed3
SP
896 # hence, we don't need to untaint the parent directory every time we chdir
897 # to it later
7e47e6ff 898 unless (defined $updir_loc) {
81793b90
GS
899 if ($untaint_skip == 0) {
900 die "directory $dir_loc is still tainted";
901 }
902 else {
903 return;
904 }
905 }
906 }
7e47e6ff
JH
907 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
908 unless ($ok) {
cd68ec93 909 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90
GS
910 return;
911 }
912 }
913
7e47e6ff
JH
914 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
915
81793b90
GS
916 while (defined $SE) {
917
918 unless ($bydepth) {
7e47e6ff 919 # change (back) to parent directory (always untainted)
704ea872 920 unless ($no_chdir) {
7e47e6ff 921 unless (chdir $updir_loc) {
cd68ec93 922 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872
GS
923 next;
924 }
925 }
7e47e6ff
JH
926 $dir= $p_dir; # $File::Find::dir
927 $name= $dir_name; # $File::Find::name
928 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
929 $fullname= $dir_loc; # $File::Find::fullname
81793b90 930 # prune may happen here
7e47e6ff 931 $prune= 0;
704ea872 932 lstat($_); # make sure file tests with '_' work
abfdd623 933 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 934 next if $prune;
81793b90
GS
935 }
936
937 # change to that directory
7e47e6ff
JH
938 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
939 $updir_loc = $dir_loc;
940 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
3555aed3 941 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
7e47e6ff
JH
942 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
943 unless (defined $updir_loc) {
81793b90
GS
944 if ($untaint_skip == 0) {
945 die "directory $dir_loc is still tainted";
a0d0e21e 946 }
237437d0 947 else {
81793b90 948 next;
237437d0 949 }
a0d0e21e
LW
950 }
951 }
7e47e6ff 952 unless (chdir $updir_loc) {
cd68ec93 953 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90
GS
954 next;
955 }
956 }
957
7e47e6ff 958 $dir = $dir_name; # $File::Find::dir
81793b90
GS
959
960 # Get the list of files in the current directory.
7e47e6ff 961 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 962 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90
GS
963 next;
964 }
965 @filenames = readdir DIR;
966 closedir(DIR);
967
968 for my $FN (@filenames) {
a1ccf0c4
JM
969 if ($Is_VMS) {
970 # Big hammer here - Compensate for VMS trailing . and .dir
971 # No win situation until this is changed, but this
972 # will handle the majority of the cases with breaking the fewest.
973
974 $FN =~ s/\.dir\z//i;
975 $FN =~ s#\.$## if ($FN ne '.');
976 }
7e47e6ff 977 next if $FN =~ $File::Find::skip_pattern;
81793b90
GS
978
979 # follow symbolic links / do an lstat
07867069 980 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90
GS
981
982 # ignore if invalid symlink
3555aed3 983 unless (defined $new_loc) {
fab43c1b 984 if (!defined -l _ && $dangling_symlinks) {
615a2b9b
SP
985 if (ref $dangling_symlinks eq 'CODE') {
986 $dangling_symlinks->($FN, $dir_pref);
987 } else {
988 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n";
989 }
990 }
991
992 $fullname = undef;
993 $name = $dir_pref . $FN;
994 $_ = ($no_chdir ? $name : $FN);
995 { $wanted_callback->() };
996 next;
997 }
7e47e6ff 998
81793b90 999 if (-d _) {
a1ccf0c4
JM
1000 if ($Is_VMS) {
1001 $FN =~ s/\.dir\z//i;
1002 $FN =~ s#\.$## if ($FN ne '.');
1003 $new_loc =~ s/\.dir\z//i;
1004 $new_loc =~ s#\.$## if ($new_loc ne '.');
1005 }
7e47e6ff 1006 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90
GS
1007 }
1008 else {
3555aed3 1009 $fullname = $new_loc; # $File::Find::fullname
7e47e6ff
JH
1010 $name = $dir_pref . $FN; # $File::Find::name
1011 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 1012 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
1013 }
1014 }
1015
81793b90
GS
1016 }
1017 continue {
57e73c4b 1018 while (defined($SE = pop @Stack)) {
7e47e6ff 1019 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
862f843b
NC
1020 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1021 $dir_pref = "$dir_name/";
1022 $loc_pref = "$dir_loc/";
7e47e6ff
JH
1023 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1024 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
3555aed3 1025 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1026 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff
JH
1027 next;
1028 }
1029 }
1030 $fullname = $dir_loc; # $File::Find::fullname
1031 $name = $dir_name; # $File::Find::name
862f843b
NC
1032 if ( substr($name,-2) eq '/.' ) {
1033 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
7e47e6ff 1034 }
862f843b
NC
1035 $dir = $p_dir; # $File::Find::dir
1036 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1037 if ( substr($_,-2) eq '/.' ) {
1038 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff
JH
1039 }
1040
1041 lstat($_); # make sure file tests with '_' work
abfdd623 1042 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff
JH
1043 }
1044 else {
1045 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1046 last;
1047 }
a0d0e21e
LW
1048 }
1049 }
1050}
1051
81793b90 1052
20408e3c 1053sub wrap_wanted {
81793b90
GS
1054 my $wanted = shift;
1055 if ( ref($wanted) eq 'HASH' ) {
4c90698d
RB
1056 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) {
1057 die 'no &wanted subroutine given';
1058 }
81793b90
GS
1059 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1060 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1061 }
1062 if ( $wanted->{untaint} ) {
3555aed3 1063 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90
GS
1064 unless defined $wanted->{untaint_pattern};
1065 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1066 }
1067 return $wanted;
1068 }
4c90698d 1069 elsif( ref( $wanted ) eq 'CODE' ) {
81793b90
GS
1070 return { wanted => $wanted };
1071 }
4c90698d
RB
1072 else {
1073 die 'no &wanted subroutine given';
1074 }
a0d0e21e
LW
1075}
1076
20408e3c 1077sub find {
81793b90
GS
1078 my $wanted = shift;
1079 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e
LW
1080}
1081
55d729e4 1082sub finddepth {
81793b90
GS
1083 my $wanted = wrap_wanted(shift);
1084 $wanted->{bydepth} = 1;
1085 _find_opt($wanted, @_);
20408e3c 1086}
6280b799 1087
7e47e6ff
JH
1088# default
1089$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1090$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1091
6280b799 1092# These are hard-coded for now, but may move to hint files.
10eba763 1093if ($^O eq 'VMS') {
81793b90 1094 $Is_VMS = 1;
7e47e6ff
JH
1095 $File::Find::dont_use_nlink = 1;
1096}
b242981a
AD
1097elsif ($^O eq 'MSWin32') {
1098 $Is_Win32 = 1;
1099}
748a9306 1100
7e47e6ff
JH
1101# this _should_ work properly on all platforms
1102# where File::Find can be expected to work
1103$File::Find::current_dir = File::Spec->curdir || '.';
1104
81793b90 1105$File::Find::dont_use_nlink = 1
b242981a 1106 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 ||
0c52c6a9 1107 $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' ||
1119cb72 1108 $^O eq 'nto';
6280b799 1109
20408e3c
GS
1110# Set dont_use_nlink in your hint file if your system's stat doesn't
1111# report the number of links in a directory as an indication
1112# of the number of files.
1113# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90
GS
1114unless ($File::Find::dont_use_nlink) {
1115 require Config;
1116 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c
GS
1117}
1118
3555aed3
SP
1119# We need a function that checks if a scalar is tainted. Either use the
1120# Scalar::Util module's tainted() function or our (slower) pure Perl
7e47e6ff
JH
1121# fallback is_tainted_pp()
1122{
1123 local $@;
1124 eval { require Scalar::Util };
1125 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1126}
1127
a0d0e21e 11281;