This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
docs patch: 'unicode_strings' doesn't change utf8ness
[perl5.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
7e8ec876 6our $VERSION = '1.21';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
7bd31527 10#
98dc9551 11# Modified to ensure sub-directory traversal order is not inverted by stack
7bd31527
JH
12# push and pops. That is remains in the same order as in the directory file,
13# or user pre-processing (EG:sorted).
14#
15
f06db76b
AD
16=head1 NAME
17
abfdd623 18File::Find - Traverse a directory tree.
f06db76b
AD
19
20=head1 SYNOPSIS
21
22 use File::Find;
6d355c6e 23 find(\&wanted, @directories_to_search);
f06db76b 24 sub wanted { ... }
237437d0 25
f06db76b 26 use File::Find;
abfdd623 27 finddepth(\&wanted, @directories_to_search);
f06db76b 28 sub wanted { ... }
3cb6de81 29
81793b90
GS
30 use File::Find;
31 find({ wanted => \&process, follow => 1 }, '.');
f06db76b
AD
32
33=head1 DESCRIPTION
34
abfdd623
MS
35These are functions for searching through directory trees doing work
36on each file found similar to the Unix I<find> command. File::Find
37exports two functions, C<find> and C<finddepth>. They work similarly
38but have subtle differences.
39
40=over 4
41
42=item B<find>
43
44 find(\&wanted, @directories);
45 find(\%options, @directories);
46
6eb87ff8
MJD
47C<find()> does a depth-first search over the given C<@directories> in
48the order they are given. For each file or directory found, it calls
49the C<&wanted> subroutine. (See below for details on how to use the
50C<&wanted> function). Additionally, for each directory found, it will
51C<chdir()> into that directory and continue the search, invoking the
52C<&wanted> function on each file or subdirectory in the directory.
abfdd623
MS
53
54=item B<finddepth>
55
56 finddepth(\&wanted, @directories);
57 finddepth(\%options, @directories);
58
b807ffce 59C<finddepth()> works just like C<find()> except that it invokes the
6eb87ff8
MJD
60C<&wanted> function for a directory I<after> invoking it for the
61directory's contents. It does a postorder traversal instead of a
62preorder traversal, working from the bottom of the directory tree up
63where C<find()> works from the top of the tree down.
abfdd623
MS
64
65=back
66
67=head2 %options
68
95e23d19
AL
69The first argument to C<find()> is either a code reference to your
70C<&wanted> function, or a hash reference describing the operations
71to be performed for each file. The
abfdd623 72code reference is described in L<The wanted function> below.
20408e3c 73
81793b90
GS
74Here are the possible keys for the hash:
75
76=over 3
77
78=item C<wanted>
79
abfdd623 80The value should be a code reference. This code reference is
4c90698d
RB
81described in L<The wanted function> below. The C<&wanted> subroutine is
82mandatory.
81793b90
GS
83
84=item C<bydepth>
85
86Reports the name of a directory only AFTER all its entries
95e23d19 87have been reported. Entry point C<finddepth()> is a shortcut for
788c0c32 88specifying C<< { bydepth => 1 } >> in the first argument of C<find()>.
81793b90 89
719c805e
JS
90=item C<preprocess>
91
3555aed3
SP
92The value should be a code reference. This code reference is used to
93preprocess the current directory. The name of the currently processed
95e23d19
AL
94directory is in C<$File::Find::dir>. Your preprocessing function is
95called after C<readdir()>, but before the loop that calls the C<wanted()>
3555aed3
SP
96function. It is called with a list of strings (actually file/directory
97names) and is expected to return a list of strings. The code can be
98used to sort the file/directory names alphabetically, numerically,
99or to filter out directory entries based on their name alone. When
7e47e6ff 100I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e
JS
101
102=item C<postprocess>
103
3555aed3
SP
104The value should be a code reference. It is invoked just before leaving
105the currently processed directory. It is called in void context with no
106arguments. The name of the current directory is in C<$File::Find::dir>. This
107hook is handy for summarizing a directory, such as calculating its disk
108usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 109no-op.
719c805e 110
81793b90
GS
111=item C<follow>
112
113Causes symbolic links to be followed. Since directory trees with symbolic
114links (followed) may contain files more than once and may even have
115cycles, a hash has to be built up with an entry for each file.
116This might be expensive both in space and time for a large
f4d05d5f 117directory tree. See L</follow_fast> and L</follow_skip> below.
81793b90
GS
118If either I<follow> or I<follow_fast> is in effect:
119
120=over 6
121
a45bd81d 122=item *
81793b90 123
54bd407c
RGS
124It is guaranteed that an I<lstat> has been called before the user's
125C<wanted()> function is called. This enables fast file checks involving S<_>.
126Note that this guarantee no longer holds if I<follow> or I<follow_fast>
127are not set.
81793b90 128
a45bd81d 129=item *
81793b90
GS
130
131There is a variable C<$File::Find::fullname> which holds the absolute
615a2b9b
SP
132pathname of the file with all symbolic links resolved. If the link is
133a dangling symbolic link, then fullname will be set to C<undef>.
81793b90
GS
134
135=back
136
204b4d7f
SP
137This is a no-op on Win32.
138
81793b90
GS
139=item C<follow_fast>
140
f10e1564
RM
141This is similar to I<follow> except that it may report some files more
142than once. It does detect cycles, however. Since only symbolic links
143have to be hashed, this is much cheaper both in space and time. If
95e23d19 144processing a file more than once (by the user's C<wanted()> function)
81793b90
GS
145is worse than just taking time, the option I<follow> should be used.
146
204b4d7f
SP
147This is also a no-op on Win32.
148
81793b90
GS
149=item C<follow_skip>
150
151C<follow_skip==1>, which is the default, causes all files which are
152neither directories nor symbolic links to be ignored if they are about
3555aed3 153to be processed a second time. If a directory or a symbolic link
81793b90 154are about to be processed a second time, File::Find dies.
95e23d19 155
81793b90
GS
156C<follow_skip==0> causes File::Find to die if any file is about to be
157processed a second time.
95e23d19 158
81793b90 159C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 160directories but to proceed normally otherwise.
20408e3c 161
80e52b73
JH
162=item C<dangling_symlinks>
163
164If true and a code reference, will be called with the symbolic link
165name and the directory it lives in as arguments. Otherwise, if true
166and warnings are on, warning "symbolic_link_name is a dangling
167symbolic link\n" will be issued. If false, the dangling symbolic link
168will be silently ignored.
f06db76b 169
81793b90
GS
170=item C<no_chdir>
171
95e23d19 172Does not C<chdir()> to each directory as it recurses. The C<wanted()>
81793b90
GS
173function will need to be aware of this, of course. In this case,
174C<$_> will be the same as C<$File::Find::name>.
175
176=item C<untaint>
177
178If find is used in taint-mode (-T command line switch or if EUID != UID
179or if EGID != GID) then internally directory names have to be untainted
7e47e6ff 180before they can be chdir'ed to. Therefore they are checked against a regular
3555aed3
SP
181expression I<untaint_pattern>. Note that all names passed to the user's
182I<wanted()> function are still tainted. If this option is used while
7e47e6ff 183not in taint-mode, C<untaint> is a no-op.
81793b90
GS
184
185=item C<untaint_pattern>
186
187See above. This should be set using the C<qr> quoting operator.
3555aed3 188The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 189Note that the parentheses are vital.
81793b90
GS
190
191=item C<untaint_skip>
192
3555aed3 193If set, a directory which fails the I<untaint_pattern> is skipped,
7e47e6ff 194including all its sub-directories. The default is to 'die' in such a case.
81793b90
GS
195
196=back
197
abfdd623
MS
198=head2 The wanted function
199
95e23d19
AL
200The C<wanted()> function does whatever verifications you want on
201each file and directory. Note that despite its name, the C<wanted()>
202function is a generic callback function, and does B<not> tell
203File::Find if a file is "wanted" or not. In fact, its return value
204is ignored.
205
206The wanted function takes no arguments but rather does its work
abfdd623
MS
207through a collection of variables.
208
209=over 4
210
f837ebe2 211=item C<$File::Find::dir> is the current directory name,
abfdd623
MS
212
213=item C<$_> is the current filename within that directory
214
f837ebe2 215=item C<$File::Find::name> is the complete pathname to the file.
abfdd623
MS
216
217=back
218
4c767a4e 219The above variables have all been localized and may be changed without
026810a8 220affecting data outside of the wanted function.
4c767a4e 221
95e23d19 222For example, when examining the file F</some/path/foo.ext> you will have:
abfdd623
MS
223
224 $File::Find::dir = /some/path/
225 $_ = foo.ext
226 $File::Find::name = /some/path/foo.ext
227
3555aed3 228You are chdir()'d to C<$File::Find::dir> when the function is called,
abfdd623
MS
229unless C<no_chdir> was specified. Note that when changing to
230directories is in effect the root directory (F</>) is a somewhat
231special case inasmuch as the concatenation of C<$File::Find::dir>,
232C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
233table below summarizes all variants:
5cf0a2f2
WL
234
235 $File::Find::name $File::Find::dir $_
236 default / / .
237 no_chdir=>0 /etc / etc
238 /etc/x /etc x
abfdd623 239
5cf0a2f2
WL
240 no_chdir=>1 / / /
241 /etc / /etc
242 /etc/x /etc /etc/x
243
244
788c0c32 245When C<follow> or C<follow_fast> are in effect, there is
f10e1564
RM
246also a C<$File::Find::fullname>. The function may set
247C<$File::Find::prune> to prune the tree unless C<bydepth> was
248specified. Unless C<follow> or C<follow_fast> is specified, for
249compatibility reasons (find.pl, find2perl) there are in addition the
250following globals available: C<$File::Find::topdir>,
251C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 252C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 253
20408e3c 254This library is useful for the C<find2perl> tool, which when fed,
f06db76b
AD
255
256 find2perl / -name .nfs\* -mtime +7 \
81793b90 257 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b
AD
258
259produces something like:
260
261 sub wanted {
c7b9dd21 262 /^\.nfs.*\z/s &&
81793b90 263 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b
AD
264 int(-M _) > 7 &&
265 unlink($_)
266 ||
81793b90 267 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 268 $dev < 0 &&
6280b799 269 ($File::Find::prune = 1);
f06db76b
AD
270 }
271
43dece2a
JH
272Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
273filehandle that caches the information from the preceding
95e23d19 274C<stat()>, C<lstat()>, or filetest.
43dece2a 275
1cffc1dd
JH
276Here's another interesting wanted function. It will find all symbolic
277links that don't resolve:
f06db76b
AD
278
279 sub wanted {
81793b90 280 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 281 }
f06db76b 282
c82ae0bc 283Note that you may mix directories and (non-directory) files in the list of
04cc0b0d 284directories to be searched by the C<wanted()> function.
c82ae0bc
JK
285
286 find(\&wanted, "./foo", "./bar", "./baz/epsilon");
287
288In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be
289evaluated by C<wanted()>.
290
81793b90
GS
291See also the script C<pfind> on CPAN for a nice application of this
292module.
293
cd68ec93
RGS
294=head1 WARNINGS
295
296If you run your program with the C<-w> switch, or if you use the
297C<warnings> pragma, File::Find will report warnings for several weird
298situations. You can disable these warnings by putting the statement
299
300 no warnings 'File::Find';
301
302in the appropriate scope. See L<perllexwarn> for more info about lexical
303warnings.
304
81793b90
GS
305=head1 CAVEAT
306
5fa2bf2b
DD
307=over 2
308
309=item $dont_use_nlink
310
311You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067
T
312force File::Find to always stat directories. This was used for file systems
313that do not have an C<nlink> count matching the number of sub-directories.
314Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
315system) and a couple of others.
5fa2bf2b 316
6cf3b067
T
317You shouldn't need to set this variable, since File::Find should now detect
318such file systems on-the-fly and switch itself to using stat. This works even
319for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 320
6cf3b067 321If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b
DD
322
323=item symlinks
324
f10e1564 325Be aware that the option to follow symbolic links can be dangerous.
81793b90
GS
326Depending on the structure of the directory tree (including symbolic
327links to directories) you might traverse a given (physical) directory
3555aed3 328more than once (only if C<follow_fast> is in effect).
81793b90
GS
329Furthermore, deleting or changing files in a symbolically linked directory
330might cause very unpleasant surprises, since you delete or change files
331in an unknown directory.
0530a6c4 332
5fa2bf2b
DD
333=back
334
6eb87ff8
MJD
335=head1 BUGS AND CAVEATS
336
337Despite the name of the C<finddepth()> function, both C<find()> and
338C<finddepth()> perform a depth-first search of the directory
339hierarchy.
340
a85af077
A
341=head1 HISTORY
342
343File::Find used to produce incorrect results if called recursively.
344During the development of perl 5.8 this bug was fixed.
345The first fixed version of File::Find was 1.01.
346
9b33fb8e
DJ
347=head1 SEE ALSO
348
349find, find2perl.
350
f06db76b
AD
351=cut
352
b75c8c73
MS
353our @ISA = qw(Exporter);
354our @EXPORT = qw(find finddepth);
6280b799 355
a0d0e21e 356
81793b90
GS
357use strict;
358my $Is_VMS;
a0b245d5 359my $Is_Win32;
81793b90
GS
360
361require File::Basename;
7e47e6ff 362require File::Spec;
81793b90 363
9f826d6a
BM
364# Should ideally be my() not our() but local() currently
365# refuses to operate on lexicals
366
367our %SLnkSeen;
368our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 369 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 370 $pre_process, $post_process, $dangling_symlinks);
81793b90
GS
371
372sub contract_name {
373 my ($cdir,$fn) = @_;
374
7e47e6ff 375 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90
GS
376
377 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
378
379 $fn =~ s|^\./||;
380
381 my $abs_name= $cdir . $fn;
382
383 if (substr($fn,0,3) eq '../') {
51393fc0 384 1 while $abs_name =~ s!/[^/]*/\.\./+!/!;
81793b90
GS
385 }
386
387 return $abs_name;
388}
389
81793b90
GS
390sub PathCombine($$) {
391 my ($Base,$Name) = @_;
392 my $AbsName;
393
862f843b
NC
394 if (substr($Name,0,1) eq '/') {
395 $AbsName= $Name;
81793b90
GS
396 }
397 else {
862f843b
NC
398 $AbsName= contract_name($Base,$Name);
399 }
81793b90 400
862f843b
NC
401 # (simple) check for recursion
402 my $newlen= length($AbsName);
403 if ($newlen <= length($Base)) {
404 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
405 && $AbsName eq substr($Base,0,$newlen))
406 {
407 return undef;
81793b90
GS
408 }
409 }
410 return $AbsName;
411}
412
413sub Follow_SymLink($) {
414 my ($AbsName) = @_;
415
416 my ($NewName,$DEV, $INO);
417 ($DEV, $INO)= lstat $AbsName;
418
419 while (-l _) {
420 if ($SLnkSeen{$DEV, $INO}++) {
421 if ($follow_skip < 2) {
422 die "$AbsName is encountered a second time";
a0d0e21e
LW
423 }
424 else {
81793b90 425 return undef;
a0d0e21e
LW
426 }
427 }
81793b90
GS
428 $NewName= PathCombine($AbsName, readlink($AbsName));
429 unless(defined $NewName) {
430 if ($follow_skip < 2) {
431 die "$AbsName is a recursive symbolic link";
432 }
433 else {
434 return undef;
a0d0e21e 435 }
81793b90
GS
436 }
437 else {
438 $AbsName= $NewName;
439 }
440 ($DEV, $INO) = lstat($AbsName);
441 return undef unless defined $DEV; # dangling symbolic link
442 }
443
cd68ec93 444 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 445 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90
GS
446 die "$AbsName encountered a second time";
447 }
448 else {
449 return undef;
450 }
451 }
452
453 return $AbsName;
454}
455
17f410f9 456our($dir, $name, $fullname, $prune);
81793b90
GS
457sub _find_dir_symlnk($$$);
458sub _find_dir($$$);
459
7e47e6ff
JH
460# check whether or not a scalar variable is tainted
461# (code straight from the Camel, 3rd ed., page 561)
462sub is_tainted_pp {
463 my $arg = shift;
464 my $nada = substr($arg, 0, 0); # zero-length
465 local $@;
466 eval { eval "# $nada" };
467 return length($@) != 0;
3555aed3 468}
7e47e6ff 469
81793b90
GS
470sub _find_opt {
471 my $wanted = shift;
472 die "invalid top directory" unless defined $_[0];
473
9f826d6a
BM
474 # This function must local()ize everything because callbacks may
475 # call find() or finddepth()
476
477 local %SLnkSeen;
478 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
479 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 480 $pre_process, $post_process, $dangling_symlinks);
4c621faf 481 local($dir, $name, $fullname, $prune);
bc125c03 482 local *_ = \my $a;
9f826d6a 483
a0c9c202 484 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
a1ccf0c4
JM
485 if ($Is_VMS) {
486 # VMS returns this by default in VMS format which just doesn't
487 # work for the rest of this module.
488 $cwd = VMS::Filespec::unixpath($cwd);
489
490 # Apparently this is not expected to have a trailing space.
491 # To attempt to make VMS/UNIX conversions mostly reversable,
492 # a trailing slash is needed. The run-time functions ignore the
493 # resulting double slash, but it causes the perl tests to fail.
494 $cwd =~ s#/\z##;
495
496 # This comes up in upper case now, but should be lower.
497 # In the future this could be exact case, no need to change.
498 }
80e52b73
JH
499 my $cwd_untainted = $cwd;
500 my $check_t_cwd = 1;
501 $wanted_callback = $wanted->{wanted};
502 $bydepth = $wanted->{bydepth};
503 $pre_process = $wanted->{preprocess};
504 $post_process = $wanted->{postprocess};
505 $no_chdir = $wanted->{no_chdir};
a0b245d5
AD
506 $full_check = $Is_Win32 ? 0 : $wanted->{follow};
507 $follow = $Is_Win32 ? 0 :
1bb17459 508 $full_check || $wanted->{follow_fast};
80e52b73
JH
509 $follow_skip = $wanted->{follow_skip};
510 $untaint = $wanted->{untaint};
511 $untaint_pat = $wanted->{untaint_pattern};
512 $untaint_skip = $wanted->{untaint_skip};
513 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 514
1cffc1dd 515 # for compatibility reasons (find.pl, find2perl)
9f826d6a 516 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90
GS
517
518 # a symbolic link to a directory doesn't increase the link count
519 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
3555aed3 520
e7b91b67 521 my ($abs_dir, $Is_Dir);
81793b90
GS
522
523 Proc_Top_Item:
4c621faf 524 foreach my $TOP (@_) {
7e47e6ff
JH
525 my $top_item = $TOP;
526
3555aed3
SP
527 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
528
a0b245d5
AD
529 if ($Is_Win32) {
530 $top_item =~ s|[/\\]\z||
531 unless $top_item =~ m{^(?:\w:)?[/\\]$};
7e47e6ff
JH
532 }
533 else {
534 $top_item =~ s|/\z|| unless $top_item eq '/';
7e47e6ff
JH
535 }
536
537 $Is_Dir= 0;
538
539 if ($follow) {
540
862f843b
NC
541 if (substr($top_item,0,1) eq '/') {
542 $abs_dir = $top_item;
7e47e6ff 543 }
862f843b
NC
544 elsif ($top_item eq $File::Find::current_dir) {
545 $abs_dir = $cwd;
546 }
547 else { # care about any ../
548 $top_item =~ s/\.dir\z//i if $Is_VMS;
549 $abs_dir = contract_name("$cwd/",$top_item);
7e47e6ff
JH
550 }
551 $abs_dir= Follow_SymLink($abs_dir);
552 unless (defined $abs_dir) {
80e52b73
JH
553 if ($dangling_symlinks) {
554 if (ref $dangling_symlinks eq 'CODE') {
555 $dangling_symlinks->($top_item, $cwd);
556 } else {
cd68ec93 557 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73
JH
558 }
559 }
81793b90 560 next Proc_Top_Item;
7e47e6ff
JH
561 }
562
563 if (-d _) {
a1ccf0c4 564 $top_item =~ s/\.dir\z//i if $Is_VMS;
81793b90
GS
565 _find_dir_symlnk($wanted, $abs_dir, $top_item);
566 $Is_Dir= 1;
7e47e6ff
JH
567 }
568 }
81793b90 569 else { # no follow
7e47e6ff
JH
570 $topdir = $top_item;
571 unless (defined $topnlink) {
cd68ec93 572 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff
JH
573 next Proc_Top_Item;
574 }
575 if (-d _) {
544ff7a7 576 $top_item =~ s/\.dir\z//i if $Is_VMS;
e7b91b67 577 _find_dir($wanted, $top_item, $topnlink);
81793b90 578 $Is_Dir= 1;
7e47e6ff 579 }
237437d0 580 else {
81793b90 581 $abs_dir= $top_item;
7e47e6ff
JH
582 }
583 }
81793b90 584
7e47e6ff 585 unless ($Is_Dir) {
81793b90 586 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
862f843b 587 ($dir,$_) = ('./', $top_item);
81793b90
GS
588 }
589
7e47e6ff
JH
590 $abs_dir = $dir;
591 if (( $untaint ) && (is_tainted($dir) )) {
592 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90
GS
593 unless (defined $abs_dir) {
594 if ($untaint_skip == 0) {
7e47e6ff 595 die "directory $dir is still tainted";
81793b90
GS
596 }
597 else {
598 next Proc_Top_Item;
599 }
600 }
7e47e6ff 601 }
81793b90 602
7e47e6ff 603 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 604 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff
JH
605 next Proc_Top_Item;
606 }
719911cc 607
7e47e6ff 608 $name = $abs_dir . $_; # $File::Find::name
3bb6d3e5 609 $_ = $name if $no_chdir;
719911cc 610
abfdd623 611 { $wanted_callback->() }; # protect against wild "next"
81793b90 612
7e47e6ff 613 }
81793b90 614
7e47e6ff
JH
615 unless ( $no_chdir ) {
616 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
617 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
618 unless (defined $cwd_untainted) {
619 die "insecure cwd in find(depth)";
620 }
621 $check_t_cwd = 0;
622 }
623 unless (chdir $cwd_untainted) {
624 die "Can't cd to $cwd: $!\n";
625 }
626 }
81793b90
GS
627 }
628}
629
630# API:
631# $wanted
632# $p_dir : "parent directory"
633# $nlink : what came back from the stat
634# preconditions:
635# chdir (if not no_chdir) to dir
636
637sub _find_dir($$$) {
638 my ($wanted, $p_dir, $nlink) = @_;
639 my ($CdLvl,$Level) = (0,0);
640 my @Stack;
641 my @filenames;
642 my ($subcount,$sub_nlink);
643 my $SE= [];
644 my $dir_name= $p_dir;
7e47e6ff 645 my $dir_pref;
39e79f6b 646 my $dir_rel = $File::Find::current_dir;
7e47e6ff 647 my $tainted = 0;
5fa2bf2b 648 my $no_nlink;
7e47e6ff 649
a0b245d5
AD
650 if ($Is_Win32) {
651 $dir_pref
652 = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" );
653 } elsif ($Is_VMS) {
a1ccf0c4
JM
654
655 # VMS is returning trailing .dir on directories
656 # and trailing . on files and symbolic links
657 # in UNIX syntax.
658 #
659
660 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.';
661
1e9c9d75 662 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" );
7e47e6ff
JH
663 }
664 else {
665 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 666 }
81793b90
GS
667
668 local ($dir, $name, $prune, *DIR);
7e47e6ff
JH
669
670 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 671 my $udir = $p_dir;
7e47e6ff
JH
672 if (( $untaint ) && (is_tainted($p_dir) )) {
673 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90
GS
674 unless (defined $udir) {
675 if ($untaint_skip == 0) {
676 die "directory $p_dir is still tainted";
677 }
678 else {
679 return;
680 }
237437d0 681 }
a0d0e21e 682 }
8d8eebbf 683 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
cd68ec93 684 warnings::warnif "Can't cd to $udir: $!\n";
81793b90
GS
685 return;
686 }
687 }
7e47e6ff
JH
688
689 # push the starting directory
57e73c4b 690 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90
GS
691
692 while (defined $SE) {
693 unless ($bydepth) {
3555aed3
SP
694 $dir= $p_dir; # $File::Find::dir
695 $name= $dir_name; # $File::Find::name
7e47e6ff 696 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 697 # prune may happen here
7e47e6ff 698 $prune= 0;
abfdd623 699 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 700 next if $prune;
81793b90 701 }
7e47e6ff 702
81793b90 703 # change to that directory
7e47e6ff 704 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 705 my $udir= $dir_rel;
7e47e6ff
JH
706 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
707 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90
GS
708 unless (defined $udir) {
709 if ($untaint_skip == 0) {
862f843b 710 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
7e47e6ff 711 } else { # $untaint_skip == 1
3555aed3 712 next;
81793b90
GS
713 }
714 }
715 }
8d8eebbf 716 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
862f843b
NC
717 warnings::warnif "Can't cd to (" .
718 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
81793b90
GS
719 next;
720 }
721 $CdLvl++;
722 }
723
3555aed3 724 $dir= $dir_name; # $File::Find::dir
81793b90
GS
725
726 # Get the list of files in the current directory.
7e47e6ff 727 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 728 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90
GS
729 next;
730 }
731 @filenames = readdir DIR;
732 closedir(DIR);
abfdd623 733 @filenames = $pre_process->(@filenames) if $pre_process;
719c805e 734 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 735
98dc9551 736 # default: use whatever was specified
5fa2bf2b
DD
737 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
738 $no_nlink = $avoid_nlink;
739 # if dir has wrong nlink count, force switch to slower stat method
740 $no_nlink = 1 if ($nlink < 2);
741
742 if ($nlink == 2 && !$no_nlink) {
81793b90
GS
743 # This dir has no subdirectories.
744 for my $FN (@filenames) {
a1ccf0c4
JM
745 if ($Is_VMS) {
746 # Big hammer here - Compensate for VMS trailing . and .dir
747 # No win situation until this is changed, but this
748 # will handle the majority of the cases with breaking the fewest
749
750 $FN =~ s/\.dir\z//i;
751 $FN =~ s#\.$## if ($FN ne '.');
752 }
7e47e6ff 753 next if $FN =~ $File::Find::skip_pattern;
81793b90 754
7e47e6ff
JH
755 $name = $dir_pref . $FN; # $File::Find::name
756 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 757 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
758 }
759
760 }
761 else {
762 # This dir has subdirectories.
763 $subcount = $nlink - 2;
764
7bd31527
JH
765 # HACK: insert directories at this position. so as to preserve
766 # the user pre-processed ordering of files.
767 # EG: directory traversal is in user sorted order, not at random.
768 my $stack_top = @Stack;
769
81793b90 770 for my $FN (@filenames) {
7e47e6ff 771 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 772 if ($subcount > 0 || $no_nlink) {
81793b90
GS
773 # Seen all the subdirs?
774 # check for directoriness.
775 # stat is faster for a file in the current directory
07867069 776 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90
GS
777
778 if (-d _) {
779 --$subcount;
544ff7a7 780 $FN =~ s/\.dir\z//i if $Is_VMS;
7bd31527
JH
781 # HACK: replace push to preserve dir traversal order
782 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
783 splice @Stack, $stack_top, 0,
784 [$CdLvl,$dir_name,$FN,$sub_nlink];
81793b90
GS
785 }
786 else {
7e47e6ff
JH
787 $name = $dir_pref . $FN; # $File::Find::name
788 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 789 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
790 }
791 }
07867069 792 else {
7e47e6ff
JH
793 $name = $dir_pref . $FN; # $File::Find::name
794 $_= ($no_chdir ? $name : $FN); # $_
abfdd623 795 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
796 }
797 }
798 }
17b275ff
RA
799 }
800 continue {
57e73c4b 801 while ( defined ($SE = pop @Stack) ) {
81793b90
GS
802 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
803 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff 804 my $tmp;
862f843b 805 if ($Is_VMS) {
d8101854
CB
806 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']';
807 }
7e47e6ff
JH
808 else {
809 $tmp = join('/',('..') x ($CdLvl-$Level));
810 }
d8101854 811 die "Can't cd to $tmp from $dir_name"
7e47e6ff 812 unless chdir ($tmp);
81793b90
GS
813 $CdLvl = $Level;
814 }
7e47e6ff 815
b242981a
AD
816 if ($Is_Win32) {
817 $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$}
818 ? "$p_dir$dir_rel" : "$p_dir/$dir_rel");
3555aed3
SP
819 $dir_pref = "$dir_name/";
820 }
1e9c9d75
CB
821 elsif ($^O eq 'VMS') {
822 if ($p_dir =~ m/[\]>]+$/) {
823 $dir_name = $p_dir;
824 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/;
825 $dir_pref = $dir_name;
826 }
827 else {
828 $dir_name = "$p_dir/$dir_rel";
829 $dir_pref = "$dir_name/";
830 }
831 }
7e47e6ff
JH
832 else {
833 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
834 $dir_pref = "$dir_name/";
835 }
836
719c805e 837 if ( $nlink == -2 ) {
7e47e6ff 838 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 839 $_ = $File::Find::current_dir;
abfdd623 840 $post_process->(); # End-of-directory processing
7e47e6ff
JH
841 }
842 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
843 $name = $dir_name;
862f843b
NC
844 if ( substr($name,-2) eq '/.' ) {
845 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff 846 }
862f843b
NC
847 $dir = $p_dir;
848 $_ = ($no_chdir ? $dir_name : $dir_rel );
849 if ( substr($_,-2) eq '/.' ) {
850 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff 851 }
abfdd623 852 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff
JH
853 }
854 else {
855 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
856 last;
857 }
81793b90 858 }
a0d0e21e
LW
859 }
860}
861
81793b90
GS
862
863# API:
864# $wanted
865# $dir_loc : absolute location of a dir
866# $p_dir : "parent directory"
867# preconditions:
868# chdir (if not no_chdir) to dir
869
870sub _find_dir_symlnk($$$) {
7e47e6ff 871 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90
GS
872 my @Stack;
873 my @filenames;
874 my $new_loc;
7e47e6ff 875 my $updir_loc = $dir_loc; # untainted parent directory
81793b90
GS
876 my $SE = [];
877 my $dir_name = $p_dir;
7e47e6ff
JH
878 my $dir_pref;
879 my $loc_pref;
39e79f6b 880 my $dir_rel = $File::Find::current_dir;
7e47e6ff
JH
881 my $byd_flag; # flag for pending stack entry if $bydepth
882 my $tainted = 0;
883 my $ok = 1;
884
862f843b
NC
885 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
886 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
81793b90
GS
887
888 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff
JH
889
890 unless ($no_chdir) {
891 # untaint the topdir
892 if (( $untaint ) && (is_tainted($dir_loc) )) {
893 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
894 # once untainted, $updir_loc is pushed on the stack (as parent directory);
3555aed3
SP
895 # hence, we don't need to untaint the parent directory every time we chdir
896 # to it later
7e47e6ff 897 unless (defined $updir_loc) {
81793b90
GS
898 if ($untaint_skip == 0) {
899 die "directory $dir_loc is still tainted";
900 }
901 else {
902 return;
903 }
904 }
905 }
7e47e6ff
JH
906 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
907 unless ($ok) {
cd68ec93 908 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90
GS
909 return;
910 }
911 }
912
7e47e6ff
JH
913 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
914
81793b90
GS
915 while (defined $SE) {
916
917 unless ($bydepth) {
7e47e6ff 918 # change (back) to parent directory (always untainted)
704ea872 919 unless ($no_chdir) {
7e47e6ff 920 unless (chdir $updir_loc) {
cd68ec93 921 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872
GS
922 next;
923 }
924 }
7e47e6ff
JH
925 $dir= $p_dir; # $File::Find::dir
926 $name= $dir_name; # $File::Find::name
927 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
928 $fullname= $dir_loc; # $File::Find::fullname
81793b90 929 # prune may happen here
7e47e6ff 930 $prune= 0;
704ea872 931 lstat($_); # make sure file tests with '_' work
abfdd623 932 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff 933 next if $prune;
81793b90
GS
934 }
935
936 # change to that directory
7e47e6ff
JH
937 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
938 $updir_loc = $dir_loc;
939 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
3555aed3 940 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
7e47e6ff
JH
941 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
942 unless (defined $updir_loc) {
81793b90
GS
943 if ($untaint_skip == 0) {
944 die "directory $dir_loc is still tainted";
a0d0e21e 945 }
237437d0 946 else {
81793b90 947 next;
237437d0 948 }
a0d0e21e
LW
949 }
950 }
7e47e6ff 951 unless (chdir $updir_loc) {
cd68ec93 952 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90
GS
953 next;
954 }
955 }
956
7e47e6ff 957 $dir = $dir_name; # $File::Find::dir
81793b90
GS
958
959 # Get the list of files in the current directory.
7e47e6ff 960 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 961 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90
GS
962 next;
963 }
964 @filenames = readdir DIR;
965 closedir(DIR);
966
967 for my $FN (@filenames) {
a1ccf0c4
JM
968 if ($Is_VMS) {
969 # Big hammer here - Compensate for VMS trailing . and .dir
970 # No win situation until this is changed, but this
971 # will handle the majority of the cases with breaking the fewest.
972
973 $FN =~ s/\.dir\z//i;
974 $FN =~ s#\.$## if ($FN ne '.');
975 }
7e47e6ff 976 next if $FN =~ $File::Find::skip_pattern;
81793b90
GS
977
978 # follow symbolic links / do an lstat
07867069 979 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90
GS
980
981 # ignore if invalid symlink
3555aed3 982 unless (defined $new_loc) {
fab43c1b 983 if (!defined -l _ && $dangling_symlinks) {
615a2b9b
SP
984 if (ref $dangling_symlinks eq 'CODE') {
985 $dangling_symlinks->($FN, $dir_pref);
986 } else {
987 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n";
988 }
989 }
990
991 $fullname = undef;
992 $name = $dir_pref . $FN;
993 $_ = ($no_chdir ? $name : $FN);
994 { $wanted_callback->() };
995 next;
996 }
7e47e6ff 997
81793b90 998 if (-d _) {
a1ccf0c4
JM
999 if ($Is_VMS) {
1000 $FN =~ s/\.dir\z//i;
1001 $FN =~ s#\.$## if ($FN ne '.');
1002 $new_loc =~ s/\.dir\z//i;
1003 $new_loc =~ s#\.$## if ($new_loc ne '.');
1004 }
7e47e6ff 1005 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90
GS
1006 }
1007 else {
3555aed3 1008 $fullname = $new_loc; # $File::Find::fullname
7e47e6ff
JH
1009 $name = $dir_pref . $FN; # $File::Find::name
1010 $_ = ($no_chdir ? $name : $FN); # $_
abfdd623 1011 { $wanted_callback->() }; # protect against wild "next"
81793b90
GS
1012 }
1013 }
1014
81793b90
GS
1015 }
1016 continue {
57e73c4b 1017 while (defined($SE = pop @Stack)) {
7e47e6ff 1018 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
862f843b
NC
1019 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1020 $dir_pref = "$dir_name/";
1021 $loc_pref = "$dir_loc/";
7e47e6ff
JH
1022 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1023 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
3555aed3 1024 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1025 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff
JH
1026 next;
1027 }
1028 }
1029 $fullname = $dir_loc; # $File::Find::fullname
1030 $name = $dir_name; # $File::Find::name
862f843b
NC
1031 if ( substr($name,-2) eq '/.' ) {
1032 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
7e47e6ff 1033 }
862f843b
NC
1034 $dir = $p_dir; # $File::Find::dir
1035 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1036 if ( substr($_,-2) eq '/.' ) {
1037 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff
JH
1038 }
1039
1040 lstat($_); # make sure file tests with '_' work
abfdd623 1041 { $wanted_callback->() }; # protect against wild "next"
7e47e6ff
JH
1042 }
1043 else {
1044 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1045 last;
1046 }
a0d0e21e
LW
1047 }
1048 }
1049}
1050
81793b90 1051
20408e3c 1052sub wrap_wanted {
81793b90
GS
1053 my $wanted = shift;
1054 if ( ref($wanted) eq 'HASH' ) {
4c90698d
RB
1055 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) {
1056 die 'no &wanted subroutine given';
1057 }
81793b90
GS
1058 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1059 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1060 }
1061 if ( $wanted->{untaint} ) {
3555aed3 1062 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90
GS
1063 unless defined $wanted->{untaint_pattern};
1064 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1065 }
1066 return $wanted;
1067 }
4c90698d 1068 elsif( ref( $wanted ) eq 'CODE' ) {
81793b90
GS
1069 return { wanted => $wanted };
1070 }
4c90698d
RB
1071 else {
1072 die 'no &wanted subroutine given';
1073 }
a0d0e21e
LW
1074}
1075
20408e3c 1076sub find {
81793b90
GS
1077 my $wanted = shift;
1078 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e
LW
1079}
1080
55d729e4 1081sub finddepth {
81793b90
GS
1082 my $wanted = wrap_wanted(shift);
1083 $wanted->{bydepth} = 1;
1084 _find_opt($wanted, @_);
20408e3c 1085}
6280b799 1086
7e47e6ff
JH
1087# default
1088$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1089$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1090
6280b799 1091# These are hard-coded for now, but may move to hint files.
10eba763 1092if ($^O eq 'VMS') {
81793b90 1093 $Is_VMS = 1;
7e47e6ff
JH
1094 $File::Find::dont_use_nlink = 1;
1095}
b242981a
AD
1096elsif ($^O eq 'MSWin32') {
1097 $Is_Win32 = 1;
1098}
748a9306 1099
7e47e6ff
JH
1100# this _should_ work properly on all platforms
1101# where File::Find can be expected to work
1102$File::Find::current_dir = File::Spec->curdir || '.';
1103
81793b90 1104$File::Find::dont_use_nlink = 1
b242981a 1105 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 ||
0c52c6a9 1106 $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'qnx' ||
1119cb72 1107 $^O eq 'nto';
6280b799 1108
20408e3c
GS
1109# Set dont_use_nlink in your hint file if your system's stat doesn't
1110# report the number of links in a directory as an indication
1111# of the number of files.
1112# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90
GS
1113unless ($File::Find::dont_use_nlink) {
1114 require Config;
1115 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c
GS
1116}
1117
3555aed3
SP
1118# We need a function that checks if a scalar is tainted. Either use the
1119# Scalar::Util module's tainted() function or our (slower) pure Perl
7e47e6ff
JH
1120# fallback is_tainted_pp()
1121{
1122 local $@;
1123 eval { require Scalar::Util };
1124 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1125}
1126
a0d0e21e 11271;