This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
File::Find patch patches from Thomas Wegner.
[perl5.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93
RGS
5use warnings::register;
6our $VERSION = '1.04';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f06db76b
AD
10=head1 NAME
11
12find - traverse a file tree
13
14finddepth - traverse a directory structure depth-first
15
16=head1 SYNOPSIS
17
18 use File::Find;
81793b90 19 find(\&wanted, '/foo', '/bar');
f06db76b 20 sub wanted { ... }
237437d0 21
f06db76b 22 use File::Find;
81793b90 23 finddepth(\&wanted, '/foo', '/bar');
f06db76b 24 sub wanted { ... }
3cb6de81 25
81793b90
GS
26 use File::Find;
27 find({ wanted => \&process, follow => 1 }, '.');
f06db76b
AD
28
29=head1 DESCRIPTION
30
20408e3c 31The first argument to find() is either a hash reference describing the
81793b90 32operations to be performed for each file, or a code reference.
20408e3c 33
81793b90
GS
34Here are the possible keys for the hash:
35
36=over 3
37
38=item C<wanted>
39
40The value should be a code reference. This code reference is called
41I<the wanted() function> below.
42
43=item C<bydepth>
44
45Reports the name of a directory only AFTER all its entries
46have been reported. Entry point finddepth() is a shortcut for
f801979b 47specifying C<{ bydepth =E<gt> 1 }> in the first argument of find().
81793b90 48
719c805e
JS
49=item C<preprocess>
50
7e47e6ff
JH
51The value should be a code reference. This code reference is used to
52preprocess the current directory. The name of currently processed
53directory is in $File::Find::dir. Your preprocessing function is
54called after readdir() but before the loop that calls the wanted()
55function. It is called with a list of strings (actually file/directory
56names) and is expected to return a list of strings. The code can be
57used to sort the file/directory names alphabetically, numerically,
58or to filter out directory entries based on their name alone. When
59I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
719c805e
JS
60
61=item C<postprocess>
62
7e47e6ff
JH
63The value should be a code reference. It is invoked just before leaving
64the currently processed directory. It is called in void context with no
65arguments. The name of the current directory is in $File::Find::dir. This
66hook is handy for summarizing a directory, such as calculating its disk
3fa6e24b 67usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
7e47e6ff 68no-op.
719c805e 69
81793b90
GS
70=item C<follow>
71
72Causes symbolic links to be followed. Since directory trees with symbolic
73links (followed) may contain files more than once and may even have
74cycles, a hash has to be built up with an entry for each file.
75This might be expensive both in space and time for a large
76directory tree. See I<follow_fast> and I<follow_skip> below.
77If either I<follow> or I<follow_fast> is in effect:
78
79=over 6
80
a45bd81d 81=item *
81793b90 82
f10e1564 83It is guaranteed that an I<lstat> has been called before the user's
81793b90
GS
84I<wanted()> function is called. This enables fast file checks involving S< _>.
85
a45bd81d 86=item *
81793b90
GS
87
88There is a variable C<$File::Find::fullname> which holds the absolute
89pathname of the file with all symbolic links resolved
90
91=back
92
93=item C<follow_fast>
94
f10e1564
RM
95This is similar to I<follow> except that it may report some files more
96than once. It does detect cycles, however. Since only symbolic links
97have to be hashed, this is much cheaper both in space and time. If
98processing a file more than once (by the user's I<wanted()> function)
81793b90
GS
99is worse than just taking time, the option I<follow> should be used.
100
101=item C<follow_skip>
102
103C<follow_skip==1>, which is the default, causes all files which are
104neither directories nor symbolic links to be ignored if they are about
105to be processed a second time. If a directory or a symbolic link
106are about to be processed a second time, File::Find dies.
107C<follow_skip==0> causes File::Find to die if any file is about to be
108processed a second time.
109C<follow_skip==2> causes File::Find to ignore any duplicate files and
7e47e6ff 110directories but to proceed normally otherwise.
20408e3c 111
80e52b73
JH
112=item C<dangling_symlinks>
113
114If true and a code reference, will be called with the symbolic link
115name and the directory it lives in as arguments. Otherwise, if true
116and warnings are on, warning "symbolic_link_name is a dangling
117symbolic link\n" will be issued. If false, the dangling symbolic link
118will be silently ignored.
f06db76b 119
81793b90
GS
120=item C<no_chdir>
121
122Does not C<chdir()> to each directory as it recurses. The wanted()
123function will need to be aware of this, of course. In this case,
124C<$_> will be the same as C<$File::Find::name>.
125
126=item C<untaint>
127
128If find is used in taint-mode (-T command line switch or if EUID != UID
129or if EGID != GID) then internally directory names have to be untainted
7e47e6ff
JH
130before they can be chdir'ed to. Therefore they are checked against a regular
131expression I<untaint_pattern>. Note that all names passed to the user's
132I<wanted()> function are still tainted. If this option is used while
133not in taint-mode, C<untaint> is a no-op.
81793b90
GS
134
135=item C<untaint_pattern>
136
137See above. This should be set using the C<qr> quoting operator.
138The default is set to C<qr|^([-+@\w./]+)$|>.
1cffc1dd 139Note that the parentheses are vital.
81793b90
GS
140
141=item C<untaint_skip>
142
7e47e6ff
JH
143If set, a directory which fails the I<untaint_pattern> is skipped,
144including all its sub-directories. The default is to 'die' in such a case.
81793b90
GS
145
146=back
147
148The wanted() function does whatever verifications you want.
149C<$File::Find::dir> contains the current directory name, and C<$_> the
150current filename within that directory. C<$File::Find::name> contains
f10e1564
RM
151the complete pathname to the file. You are chdir()'d to
152C<$File::Find::dir> when the function is called, unless C<no_chdir>
5cf0a2f2
WL
153was specified. Note that when changing to directories is in effect
154the root directory (F</>) is a somewhat special case inasmuch as the
155concatenation of C<$File::Find::dir>, C<'/'> and C<$_> is not literally
156equal to C<$File::Find::name>. The table below summarizes all variants:
157
158 $File::Find::name $File::Find::dir $_
159 default / / .
160 no_chdir=>0 /etc / etc
161 /etc/x /etc x
162
163 no_chdir=>1 / / /
164 /etc / /etc
165 /etc/x /etc /etc/x
166
167
168When <follow> or <follow_fast> are in effect, there is
f10e1564
RM
169also a C<$File::Find::fullname>. The function may set
170C<$File::Find::prune> to prune the tree unless C<bydepth> was
171specified. Unless C<follow> or C<follow_fast> is specified, for
172compatibility reasons (find.pl, find2perl) there are in addition the
173following globals available: C<$File::Find::topdir>,
174C<$File::Find::topdev>, C<$File::Find::topino>,
e7b91b67 175C<$File::Find::topmode> and C<$File::Find::topnlink>.
47a735e8 176
20408e3c 177This library is useful for the C<find2perl> tool, which when fed,
f06db76b
AD
178
179 find2perl / -name .nfs\* -mtime +7 \
81793b90 180 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b
AD
181
182produces something like:
183
184 sub wanted {
c7b9dd21 185 /^\.nfs.*\z/s &&
81793b90 186 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b
AD
187 int(-M _) > 7 &&
188 unlink($_)
189 ||
81793b90 190 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 191 $dev < 0 &&
6280b799 192 ($File::Find::prune = 1);
f06db76b
AD
193 }
194
43dece2a
JH
195Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
196filehandle that caches the information from the preceding
197stat(), lstat(), or filetest.
198
1cffc1dd
JH
199Here's another interesting wanted function. It will find all symbolic
200links that don't resolve:
f06db76b
AD
201
202 sub wanted {
81793b90 203 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 204 }
f06db76b 205
81793b90
GS
206See also the script C<pfind> on CPAN for a nice application of this
207module.
208
cd68ec93
RGS
209=head1 WARNINGS
210
211If you run your program with the C<-w> switch, or if you use the
212C<warnings> pragma, File::Find will report warnings for several weird
213situations. You can disable these warnings by putting the statement
214
215 no warnings 'File::Find';
216
217in the appropriate scope. See L<perllexwarn> for more info about lexical
218warnings.
219
81793b90
GS
220=head1 CAVEAT
221
5fa2bf2b
DD
222=over 2
223
224=item $dont_use_nlink
225
226You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
6cf3b067
T
227force File::Find to always stat directories. This was used for file systems
228that do not have an C<nlink> count matching the number of sub-directories.
229Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
230system) and a couple of others.
5fa2bf2b 231
6cf3b067
T
232You shouldn't need to set this variable, since File::Find should now detect
233such file systems on-the-fly and switch itself to using stat. This works even
234for parts of your file system, like a mounted CD-ROM.
5fa2bf2b 235
6cf3b067 236If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
5fa2bf2b
DD
237
238=item symlinks
239
f10e1564 240Be aware that the option to follow symbolic links can be dangerous.
81793b90
GS
241Depending on the structure of the directory tree (including symbolic
242links to directories) you might traverse a given (physical) directory
243more than once (only if C<follow_fast> is in effect).
244Furthermore, deleting or changing files in a symbolically linked directory
245might cause very unpleasant surprises, since you delete or change files
246in an unknown directory.
0530a6c4 247
5fa2bf2b
DD
248=back
249
7e47e6ff
JH
250=head1 NOTES
251
252=over 4
253
254=item *
255
256Mac OS (Classic) users should note a few differences:
257
258=over 4
259
260=item *
261
262The path separator is ':', not '/', and the current directory is denoted
263as ':', not '.'. You should be careful about specifying relative pathnames.
264While a full path always begins with a volume name, a relative pathname
265should always begin with a ':'. If specifying a volume name only, a
266trailing ':' is required.
267
268=item *
269
270C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
271contains the name of a directory, that name may or may not end with a
272':'. Likewise, C<$File::Find::name>, which contains the complete
273pathname to that directory, and C<$File::Find::fullname>, which holds
274the absolute pathname of that directory with all symbolic links resolved,
275may or may not end with a ':'.
276
277=item *
278
279The default C<untaint_pattern> (see above) on Mac OS is set to
280C<qr|^(.+)$|>. Note that the parentheses are vital.
281
282=item *
283
284The invisible system file "Icon\015" is ignored. While this file may
285appear in every directory, there are some more invisible system files
286on every volume, which are all located at the volume root level (i.e.
287"MacintoshHD:"). These system files are B<not> excluded automatically.
288Your filter may use the following code to recognize invisible files or
289directories (requires Mac::Files):
290
291 use Mac::Files;
292
293 # invisible() -- returns 1 if file/directory is invisible,
1cffc1dd 294 # 0 if it's visible or undef if an error occurred
7e47e6ff
JH
295
296 sub invisible($) {
297 my $file = shift;
298 my ($fileCat, $fileInfo);
299 my $invisible_flag = 1 << 14;
300
301 if ( $fileCat = FSpGetCatInfo($file) ) {
302 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
303 return (($fileInfo->fdFlags & $invisible_flag) && 1);
304 }
305 }
306 return undef;
307 }
308
309Generally, invisible files are system files, unless an odd application
310decides to use invisible files for its own purposes. To distinguish
311such files from system files, you have to look at the B<type> and B<creator>
312file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
313C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
314(see MacPerl.pm for details).
315
316Files that appear on the desktop actually reside in an (hidden) directory
317named "Desktop Folder" on the particular disk volume. Note that, although
318all desktop files appear to be on the same "virtual" desktop, each disk
319volume actually maintains its own "Desktop Folder" directory.
320
321=back
322
323=back
0530a6c4 324
a85af077
A
325=head1 HISTORY
326
327File::Find used to produce incorrect results if called recursively.
328During the development of perl 5.8 this bug was fixed.
329The first fixed version of File::Find was 1.01.
330
f06db76b
AD
331=cut
332
b75c8c73
MS
333our @ISA = qw(Exporter);
334our @EXPORT = qw(find finddepth);
6280b799 335
a0d0e21e 336
81793b90
GS
337use strict;
338my $Is_VMS;
7e47e6ff 339my $Is_MacOS;
81793b90
GS
340
341require File::Basename;
7e47e6ff 342require File::Spec;
81793b90 343
9f826d6a
BM
344# Should ideally be my() not our() but local() currently
345# refuses to operate on lexicals
346
347our %SLnkSeen;
348our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
719c805e 349 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 350 $pre_process, $post_process, $dangling_symlinks);
81793b90
GS
351
352sub contract_name {
353 my ($cdir,$fn) = @_;
354
7e47e6ff 355 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
81793b90
GS
356
357 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
358
359 $fn =~ s|^\./||;
360
361 my $abs_name= $cdir . $fn;
362
363 if (substr($fn,0,3) eq '../') {
fecbda2b 364 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
81793b90
GS
365 }
366
367 return $abs_name;
368}
369
7e47e6ff
JH
370# return the absolute name of a directory or file
371sub contract_name_Mac {
372 my ($cdir,$fn) = @_;
373 my $abs_name;
374
375 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
376
377 my $colon_count = length ($1);
378 if ($colon_count == 1) {
379 $abs_name = $cdir . $2;
380 return $abs_name;
381 }
382 else {
383 # need to move up the tree, but
384 # only if it's not a volume name
385 for (my $i=1; $i<$colon_count; $i++) {
386 unless ($cdir =~ /^[^:]+:$/) { # volume name
387 $cdir =~ s/[^:]+:$//;
388 }
389 else {
390 return undef;
391 }
392 }
393 $abs_name = $cdir . $2;
394 return $abs_name;
395 }
396
397 }
398 else {
399
400 # $fn may be a valid path to a directory or file or (dangling)
401 # symlink, without a leading ':'
402 if ( (-e $fn) || (-l $fn) ) {
403 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
404 return $fn; # $fn is already an absolute path
405 }
406 else {
407 $abs_name = $cdir . $fn;
408 return $abs_name;
409 }
410 }
411 else { # argh!, $fn is not a valid directory/file
412 return undef;
413 }
414 }
415}
81793b90
GS
416
417sub PathCombine($$) {
418 my ($Base,$Name) = @_;
419 my $AbsName;
420
7e47e6ff
JH
421 if ($Is_MacOS) {
422 # $Name is the resolved symlink (always a full path on MacOS),
423 # i.e. there's no need to call contract_name_Mac()
424 $AbsName = $Name;
425
426 # (simple) check for recursion
427 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
428 return undef;
429 }
81793b90
GS
430 }
431 else {
7e47e6ff
JH
432 if (substr($Name,0,1) eq '/') {
433 $AbsName= $Name;
434 }
435 else {
436 $AbsName= contract_name($Base,$Name);
437 }
81793b90 438
7e47e6ff
JH
439 # (simple) check for recursion
440 my $newlen= length($AbsName);
441 if ($newlen <= length($Base)) {
442 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
443 && $AbsName eq substr($Base,0,$newlen))
444 {
445 return undef;
446 }
81793b90
GS
447 }
448 }
449 return $AbsName;
450}
451
452sub Follow_SymLink($) {
453 my ($AbsName) = @_;
454
455 my ($NewName,$DEV, $INO);
456 ($DEV, $INO)= lstat $AbsName;
457
458 while (-l _) {
459 if ($SLnkSeen{$DEV, $INO}++) {
460 if ($follow_skip < 2) {
461 die "$AbsName is encountered a second time";
a0d0e21e
LW
462 }
463 else {
81793b90 464 return undef;
a0d0e21e
LW
465 }
466 }
81793b90
GS
467 $NewName= PathCombine($AbsName, readlink($AbsName));
468 unless(defined $NewName) {
469 if ($follow_skip < 2) {
470 die "$AbsName is a recursive symbolic link";
471 }
472 else {
473 return undef;
a0d0e21e 474 }
81793b90
GS
475 }
476 else {
477 $AbsName= $NewName;
478 }
479 ($DEV, $INO) = lstat($AbsName);
480 return undef unless defined $DEV; # dangling symbolic link
481 }
482
cd68ec93 483 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
7e47e6ff 484 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
81793b90
GS
485 die "$AbsName encountered a second time";
486 }
487 else {
488 return undef;
489 }
490 }
491
492 return $AbsName;
493}
494
17f410f9 495our($dir, $name, $fullname, $prune);
81793b90
GS
496sub _find_dir_symlnk($$$);
497sub _find_dir($$$);
498
7e47e6ff
JH
499# check whether or not a scalar variable is tainted
500# (code straight from the Camel, 3rd ed., page 561)
501sub is_tainted_pp {
502 my $arg = shift;
503 my $nada = substr($arg, 0, 0); # zero-length
504 local $@;
505 eval { eval "# $nada" };
506 return length($@) != 0;
507}
508
81793b90
GS
509sub _find_opt {
510 my $wanted = shift;
511 die "invalid top directory" unless defined $_[0];
512
9f826d6a
BM
513 # This function must local()ize everything because callbacks may
514 # call find() or finddepth()
515
516 local %SLnkSeen;
517 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
518 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
80e52b73 519 $pre_process, $post_process, $dangling_symlinks);
9f826d6a
BM
520 local($dir, $name, $fullname, $prune);
521
a0c9c202 522 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
80e52b73
JH
523 my $cwd_untainted = $cwd;
524 my $check_t_cwd = 1;
525 $wanted_callback = $wanted->{wanted};
526 $bydepth = $wanted->{bydepth};
527 $pre_process = $wanted->{preprocess};
528 $post_process = $wanted->{postprocess};
529 $no_chdir = $wanted->{no_chdir};
530 $full_check = $wanted->{follow};
531 $follow = $full_check || $wanted->{follow_fast};
532 $follow_skip = $wanted->{follow_skip};
533 $untaint = $wanted->{untaint};
534 $untaint_pat = $wanted->{untaint_pattern};
535 $untaint_skip = $wanted->{untaint_skip};
536 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 537
1cffc1dd 538 # for compatibility reasons (find.pl, find2perl)
9f826d6a 539 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90
GS
540
541 # a symbolic link to a directory doesn't increase the link count
542 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
543
e7b91b67 544 my ($abs_dir, $Is_Dir);
81793b90
GS
545
546 Proc_Top_Item:
547 foreach my $TOP (@_) {
7e47e6ff
JH
548 my $top_item = $TOP;
549
550 if ($Is_MacOS) {
551 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
552 $top_item = ":$top_item"
3fa6e24b 553 if ( (-d _) && ( $top_item !~ /:/ ) );
7e47e6ff
JH
554 }
555 else {
556 $top_item =~ s|/\z|| unless $top_item eq '/';
557 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
558 }
559
560 $Is_Dir= 0;
561
562 if ($follow) {
563
564 if ($Is_MacOS) {
565 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
566
567 if ($top_item eq $File::Find::current_dir) {
568 $abs_dir = $cwd;
569 }
570 else {
571 $abs_dir = contract_name_Mac($cwd, $top_item);
572 unless (defined $abs_dir) {
cd68ec93 573 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
7e47e6ff
JH
574 next Proc_Top_Item;
575 }
576 }
577
578 }
579 else {
580 if (substr($top_item,0,1) eq '/') {
581 $abs_dir = $top_item;
582 }
583 elsif ($top_item eq $File::Find::current_dir) {
584 $abs_dir = $cwd;
585 }
586 else { # care about any ../
587 $abs_dir = contract_name("$cwd/",$top_item);
588 }
589 }
590 $abs_dir= Follow_SymLink($abs_dir);
591 unless (defined $abs_dir) {
80e52b73
JH
592 if ($dangling_symlinks) {
593 if (ref $dangling_symlinks eq 'CODE') {
594 $dangling_symlinks->($top_item, $cwd);
595 } else {
cd68ec93 596 warnings::warnif "$top_item is a dangling symbolic link\n";
80e52b73
JH
597 }
598 }
81793b90 599 next Proc_Top_Item;
7e47e6ff
JH
600 }
601
602 if (-d _) {
81793b90
GS
603 _find_dir_symlnk($wanted, $abs_dir, $top_item);
604 $Is_Dir= 1;
7e47e6ff
JH
605 }
606 }
81793b90 607 else { # no follow
7e47e6ff
JH
608 $topdir = $top_item;
609 unless (defined $topnlink) {
cd68ec93 610 warnings::warnif "Can't stat $top_item: $!\n";
7e47e6ff
JH
611 next Proc_Top_Item;
612 }
613 if (-d _) {
c7b9dd21 614 $top_item =~ s/\.dir\z// if $Is_VMS;
e7b91b67 615 _find_dir($wanted, $top_item, $topnlink);
81793b90 616 $Is_Dir= 1;
7e47e6ff 617 }
237437d0 618 else {
81793b90 619 $abs_dir= $top_item;
7e47e6ff
JH
620 }
621 }
81793b90 622
7e47e6ff 623 unless ($Is_Dir) {
81793b90 624 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
7e47e6ff
JH
625 if ($Is_MacOS) {
626 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
627 }
628 else {
629 ($dir,$_) = ('./', $top_item);
630 }
81793b90
GS
631 }
632
7e47e6ff
JH
633 $abs_dir = $dir;
634 if (( $untaint ) && (is_tainted($dir) )) {
635 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
81793b90
GS
636 unless (defined $abs_dir) {
637 if ($untaint_skip == 0) {
7e47e6ff 638 die "directory $dir is still tainted";
81793b90
GS
639 }
640 else {
641 next Proc_Top_Item;
642 }
643 }
7e47e6ff 644 }
81793b90 645
7e47e6ff 646 unless ($no_chdir || chdir $abs_dir) {
cd68ec93 647 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
7e47e6ff
JH
648 next Proc_Top_Item;
649 }
719911cc 650
7e47e6ff 651 $name = $abs_dir . $_; # $File::Find::name
719911cc 652
7e47e6ff 653 { &$wanted_callback }; # protect against wild "next"
81793b90 654
7e47e6ff 655 }
81793b90 656
7e47e6ff
JH
657 unless ( $no_chdir ) {
658 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
659 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
660 unless (defined $cwd_untainted) {
661 die "insecure cwd in find(depth)";
662 }
663 $check_t_cwd = 0;
664 }
665 unless (chdir $cwd_untainted) {
666 die "Can't cd to $cwd: $!\n";
667 }
668 }
81793b90
GS
669 }
670}
671
672# API:
673# $wanted
674# $p_dir : "parent directory"
675# $nlink : what came back from the stat
676# preconditions:
677# chdir (if not no_chdir) to dir
678
679sub _find_dir($$$) {
680 my ($wanted, $p_dir, $nlink) = @_;
681 my ($CdLvl,$Level) = (0,0);
682 my @Stack;
683 my @filenames;
684 my ($subcount,$sub_nlink);
685 my $SE= [];
686 my $dir_name= $p_dir;
7e47e6ff 687 my $dir_pref;
39e79f6b 688 my $dir_rel = $File::Find::current_dir;
7e47e6ff 689 my $tainted = 0;
5fa2bf2b 690 my $no_nlink;
7e47e6ff
JH
691
692 if ($Is_MacOS) {
693 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
7e47e6ff
JH
694 }
695 else {
696 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
7e47e6ff 697 }
81793b90
GS
698
699 local ($dir, $name, $prune, *DIR);
7e47e6ff
JH
700
701 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
81793b90 702 my $udir = $p_dir;
7e47e6ff
JH
703 if (( $untaint ) && (is_tainted($p_dir) )) {
704 ( $udir ) = $p_dir =~ m|$untaint_pat|;
81793b90
GS
705 unless (defined $udir) {
706 if ($untaint_skip == 0) {
707 die "directory $p_dir is still tainted";
708 }
709 else {
710 return;
711 }
237437d0 712 }
a0d0e21e 713 }
81793b90 714 unless (chdir $udir) {
cd68ec93 715 warnings::warnif "Can't cd to $udir: $!\n";
81793b90
GS
716 return;
717 }
718 }
7e47e6ff
JH
719
720 # push the starting directory
57e73c4b 721 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
81793b90 722
7e47e6ff
JH
723 if ($Is_MacOS) {
724 $p_dir = $dir_pref; # ensure trailing ':'
725 }
726
81793b90
GS
727 while (defined $SE) {
728 unless ($bydepth) {
7e47e6ff
JH
729 $dir= $p_dir; # $File::Find::dir
730 $name= $dir_name; # $File::Find::name
731 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
81793b90 732 # prune may happen here
7e47e6ff
JH
733 $prune= 0;
734 { &$wanted_callback }; # protect against wild "next"
735 next if $prune;
81793b90 736 }
7e47e6ff 737
81793b90 738 # change to that directory
7e47e6ff 739 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
81793b90 740 my $udir= $dir_rel;
7e47e6ff
JH
741 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
742 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
81793b90
GS
743 unless (defined $udir) {
744 if ($untaint_skip == 0) {
7e47e6ff
JH
745 if ($Is_MacOS) {
746 die "directory ($p_dir) $dir_rel is still tainted";
747 }
748 else {
749 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
750 }
751 } else { # $untaint_skip == 1
752 next;
81793b90
GS
753 }
754 }
755 }
756 unless (chdir $udir) {
7e47e6ff 757 if ($Is_MacOS) {
cd68ec93 758 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
7e47e6ff
JH
759 }
760 else {
cd68ec93
RGS
761 warnings::warnif "Can't cd to (" .
762 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
7e47e6ff 763 }
81793b90
GS
764 next;
765 }
766 $CdLvl++;
767 }
768
7e47e6ff
JH
769 if ($Is_MacOS) {
770 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
771 }
772
773 $dir= $dir_name; # $File::Find::dir
81793b90
GS
774
775 # Get the list of files in the current directory.
7e47e6ff 776 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
cd68ec93 777 warnings::warnif "Can't opendir($dir_name): $!\n";
81793b90
GS
778 next;
779 }
780 @filenames = readdir DIR;
781 closedir(DIR);
719c805e
JS
782 @filenames = &$pre_process(@filenames) if $pre_process;
783 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
81793b90 784
5fa2bf2b
DD
785 # default: use whatever was specifid
786 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
787 $no_nlink = $avoid_nlink;
788 # if dir has wrong nlink count, force switch to slower stat method
789 $no_nlink = 1 if ($nlink < 2);
790
791 if ($nlink == 2 && !$no_nlink) {
81793b90
GS
792 # This dir has no subdirectories.
793 for my $FN (@filenames) {
7e47e6ff 794 next if $FN =~ $File::Find::skip_pattern;
81793b90 795
7e47e6ff
JH
796 $name = $dir_pref . $FN; # $File::Find::name
797 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 798 { &$wanted_callback }; # protect against wild "next"
81793b90
GS
799 }
800
801 }
802 else {
803 # This dir has subdirectories.
804 $subcount = $nlink - 2;
805
806 for my $FN (@filenames) {
7e47e6ff 807 next if $FN =~ $File::Find::skip_pattern;
5fa2bf2b 808 if ($subcount > 0 || $no_nlink) {
81793b90
GS
809 # Seen all the subdirs?
810 # check for directoriness.
811 # stat is faster for a file in the current directory
07867069 812 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90
GS
813
814 if (-d _) {
815 --$subcount;
c7b9dd21 816 $FN =~ s/\.dir\z// if $Is_VMS;
81793b90
GS
817 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
818 }
819 else {
7e47e6ff
JH
820 $name = $dir_pref . $FN; # $File::Find::name
821 $_= ($no_chdir ? $name : $FN); # $_
73396e07 822 { &$wanted_callback }; # protect against wild "next"
81793b90
GS
823 }
824 }
07867069 825 else {
7e47e6ff
JH
826 $name = $dir_pref . $FN; # $File::Find::name
827 $_= ($no_chdir ? $name : $FN); # $_
73396e07 828 { &$wanted_callback }; # protect against wild "next"
81793b90
GS
829 }
830 }
831 }
17b275ff
RA
832 }
833 continue {
57e73c4b 834 while ( defined ($SE = pop @Stack) ) {
81793b90
GS
835 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
836 if ($CdLvl > $Level && !$no_chdir) {
7e47e6ff
JH
837 my $tmp;
838 if ($Is_MacOS) {
839 $tmp = (':' x ($CdLvl-$Level)) . ':';
840 }
841 else {
842 $tmp = join('/',('..') x ($CdLvl-$Level));
843 }
844 die "Can't cd to $dir_name" . $tmp
845 unless chdir ($tmp);
81793b90
GS
846 $CdLvl = $Level;
847 }
7e47e6ff
JH
848
849 if ($Is_MacOS) {
850 # $pdir always has a trailing ':', except for the starting dir,
851 # where $dir_rel eq ':'
852 $dir_name = "$p_dir$dir_rel";
853 $dir_pref = "$dir_name:";
854 }
855 else {
856 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
857 $dir_pref = "$dir_name/";
858 }
859
719c805e 860 if ( $nlink == -2 ) {
7e47e6ff 861 $name = $dir = $p_dir; # $File::Find::name / dir
39e79f6b 862 $_ = $File::Find::current_dir;
719c805e 863 &$post_process; # End-of-directory processing
7e47e6ff
JH
864 }
865 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
866 $name = $dir_name;
867 if ($Is_MacOS) {
868 if ($dir_rel eq ':') { # must be the top dir, where we started
869 $name =~ s|:$||; # $File::Find::name
870 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
871 }
872 $dir = $p_dir; # $File::Find::dir
873 $_ = ($no_chdir ? $name : $dir_rel); # $_
874 }
875 else {
876 if ( substr($name,-2) eq '/.' ) {
5cf0a2f2 877 substr($name, length($name) == 2 ? -1 : -2) = '';
7e47e6ff
JH
878 }
879 $dir = $p_dir;
880 $_ = ($no_chdir ? $dir_name : $dir_rel );
881 if ( substr($_,-2) eq '/.' ) {
5cf0a2f2 882 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff
JH
883 }
884 }
885 { &$wanted_callback }; # protect against wild "next"
886 }
887 else {
888 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
889 last;
890 }
81793b90 891 }
a0d0e21e
LW
892 }
893}
894
81793b90
GS
895
896# API:
897# $wanted
898# $dir_loc : absolute location of a dir
899# $p_dir : "parent directory"
900# preconditions:
901# chdir (if not no_chdir) to dir
902
903sub _find_dir_symlnk($$$) {
7e47e6ff 904 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
81793b90
GS
905 my @Stack;
906 my @filenames;
907 my $new_loc;
7e47e6ff 908 my $updir_loc = $dir_loc; # untainted parent directory
81793b90
GS
909 my $SE = [];
910 my $dir_name = $p_dir;
7e47e6ff
JH
911 my $dir_pref;
912 my $loc_pref;
39e79f6b 913 my $dir_rel = $File::Find::current_dir;
7e47e6ff
JH
914 my $byd_flag; # flag for pending stack entry if $bydepth
915 my $tainted = 0;
916 my $ok = 1;
917
918 if ($Is_MacOS) {
919 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
920 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
7e47e6ff
JH
921 } else {
922 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
923 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
7e47e6ff 924 }
81793b90
GS
925
926 local ($dir, $name, $fullname, $prune, *DIR);
7e47e6ff
JH
927
928 unless ($no_chdir) {
929 # untaint the topdir
930 if (( $untaint ) && (is_tainted($dir_loc) )) {
931 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
932 # once untainted, $updir_loc is pushed on the stack (as parent directory);
933 # hence, we don't need to untaint the parent directory every time we chdir
934 # to it later
935 unless (defined $updir_loc) {
81793b90
GS
936 if ($untaint_skip == 0) {
937 die "directory $dir_loc is still tainted";
938 }
939 else {
940 return;
941 }
942 }
943 }
7e47e6ff
JH
944 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
945 unless ($ok) {
cd68ec93 946 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90
GS
947 return;
948 }
949 }
950
7e47e6ff
JH
951 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
952
953 if ($Is_MacOS) {
954 $p_dir = $dir_pref; # ensure trailing ':'
955 }
57e73c4b 956
81793b90
GS
957 while (defined $SE) {
958
959 unless ($bydepth) {
7e47e6ff 960 # change (back) to parent directory (always untainted)
704ea872 961 unless ($no_chdir) {
7e47e6ff 962 unless (chdir $updir_loc) {
cd68ec93 963 warnings::warnif "Can't cd to $updir_loc: $!\n";
704ea872
GS
964 next;
965 }
966 }
7e47e6ff
JH
967 $dir= $p_dir; # $File::Find::dir
968 $name= $dir_name; # $File::Find::name
969 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
970 $fullname= $dir_loc; # $File::Find::fullname
81793b90 971 # prune may happen here
7e47e6ff 972 $prune= 0;
704ea872 973 lstat($_); # make sure file tests with '_' work
7e47e6ff
JH
974 { &$wanted_callback }; # protect against wild "next"
975 next if $prune;
81793b90
GS
976 }
977
978 # change to that directory
7e47e6ff
JH
979 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
980 $updir_loc = $dir_loc;
981 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
982 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
983 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
984 unless (defined $updir_loc) {
81793b90
GS
985 if ($untaint_skip == 0) {
986 die "directory $dir_loc is still tainted";
a0d0e21e 987 }
237437d0 988 else {
81793b90 989 next;
237437d0 990 }
a0d0e21e
LW
991 }
992 }
7e47e6ff 993 unless (chdir $updir_loc) {
cd68ec93 994 warnings::warnif "Can't cd to $updir_loc: $!\n";
81793b90
GS
995 next;
996 }
997 }
998
7e47e6ff
JH
999 if ($Is_MacOS) {
1000 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
1001 }
1002
1003 $dir = $dir_name; # $File::Find::dir
81793b90
GS
1004
1005 # Get the list of files in the current directory.
7e47e6ff 1006 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
cd68ec93 1007 warnings::warnif "Can't opendir($dir_loc): $!\n";
81793b90
GS
1008 next;
1009 }
1010 @filenames = readdir DIR;
1011 closedir(DIR);
1012
1013 for my $FN (@filenames) {
7e47e6ff 1014 next if $FN =~ $File::Find::skip_pattern;
81793b90
GS
1015
1016 # follow symbolic links / do an lstat
07867069 1017 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90
GS
1018
1019 # ignore if invalid symlink
1020 next unless defined $new_loc;
7e47e6ff 1021
81793b90 1022 if (-d _) {
7e47e6ff 1023 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
81793b90
GS
1024 }
1025 else {
7e47e6ff
JH
1026 $fullname = $new_loc; # $File::Find::fullname
1027 $name = $dir_pref . $FN; # $File::Find::name
1028 $_ = ($no_chdir ? $name : $FN); # $_
73396e07 1029 { &$wanted_callback }; # protect against wild "next"
81793b90
GS
1030 }
1031 }
1032
81793b90
GS
1033 }
1034 continue {
57e73c4b 1035 while (defined($SE = pop @Stack)) {
7e47e6ff
JH
1036 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1037 if ($Is_MacOS) {
1038 # $p_dir always has a trailing ':', except for the starting dir,
1039 # where $dir_rel eq ':'
1040 $dir_name = "$p_dir$dir_rel";
1041 $dir_pref = "$dir_name:";
1042 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1043 }
1044 else {
1045 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1046 $dir_pref = "$dir_name/";
1047 $loc_pref = "$dir_loc/";
1048 }
1049 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1050 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1051 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
cd68ec93 1052 warnings::warnif "Can't cd to $updir_loc: $!\n";
7e47e6ff
JH
1053 next;
1054 }
1055 }
1056 $fullname = $dir_loc; # $File::Find::fullname
1057 $name = $dir_name; # $File::Find::name
1058 if ($Is_MacOS) {
1059 if ($dir_rel eq ':') { # must be the top dir, where we started
1060 $name =~ s|:$||; # $File::Find::name
1061 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1062 }
1063 $dir = $p_dir; # $File::Find::dir
1064 $_ = ($no_chdir ? $name : $dir_rel); # $_
1065 }
1066 else {
1067 if ( substr($name,-2) eq '/.' ) {
f801979b 1068 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
7e47e6ff
JH
1069 }
1070 $dir = $p_dir; # $File::Find::dir
1071 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1072 if ( substr($_,-2) eq '/.' ) {
f801979b 1073 substr($_, length($_) == 2 ? -1 : -2) = '';
7e47e6ff
JH
1074 }
1075 }
1076
1077 lstat($_); # make sure file tests with '_' work
1078 { &$wanted_callback }; # protect against wild "next"
1079 }
1080 else {
1081 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1082 last;
1083 }
a0d0e21e
LW
1084 }
1085 }
1086}
1087
81793b90 1088
20408e3c 1089sub wrap_wanted {
81793b90
GS
1090 my $wanted = shift;
1091 if ( ref($wanted) eq 'HASH' ) {
1092 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1093 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1094 }
1095 if ( $wanted->{untaint} ) {
7e47e6ff 1096 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
81793b90
GS
1097 unless defined $wanted->{untaint_pattern};
1098 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1099 }
1100 return $wanted;
1101 }
1102 else {
1103 return { wanted => $wanted };
1104 }
a0d0e21e
LW
1105}
1106
20408e3c 1107sub find {
81793b90
GS
1108 my $wanted = shift;
1109 _find_opt(wrap_wanted($wanted), @_);
a0d0e21e
LW
1110}
1111
55d729e4 1112sub finddepth {
81793b90
GS
1113 my $wanted = wrap_wanted(shift);
1114 $wanted->{bydepth} = 1;
1115 _find_opt($wanted, @_);
20408e3c 1116}
6280b799 1117
7e47e6ff
JH
1118# default
1119$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1120$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1121
6280b799 1122# These are hard-coded for now, but may move to hint files.
10eba763 1123if ($^O eq 'VMS') {
81793b90 1124 $Is_VMS = 1;
7e47e6ff
JH
1125 $File::Find::dont_use_nlink = 1;
1126}
1127elsif ($^O eq 'MacOS') {
1128 $Is_MacOS = 1;
1129 $File::Find::dont_use_nlink = 1;
1130 $File::Find::skip_pattern = qr/^Icon\015\z/;
1131 $File::Find::untaint_pattern = qr|^(.+)$|;
748a9306
LW
1132}
1133
7e47e6ff
JH
1134# this _should_ work properly on all platforms
1135# where File::Find can be expected to work
1136$File::Find::current_dir = File::Spec->curdir || '.';
1137
81793b90 1138$File::Find::dont_use_nlink = 1
497711e7 1139 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
2986a63f 1140 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'NetWare';
6280b799 1141
20408e3c
GS
1142# Set dont_use_nlink in your hint file if your system's stat doesn't
1143# report the number of links in a directory as an indication
1144# of the number of files.
1145# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90
GS
1146unless ($File::Find::dont_use_nlink) {
1147 require Config;
1148 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c
GS
1149}
1150
7e47e6ff
JH
1151# We need a function that checks if a scalar is tainted. Either use the
1152# Scalar::Util module's tainted() function or our (slower) pure Perl
1153# fallback is_tainted_pp()
1154{
1155 local $@;
1156 eval { require Scalar::Util };
1157 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1158}
1159
a0d0e21e 11601;