This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
fix various C-backend shenanigans
[perl5.git] / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
07867069 2require 5.005;
a0d0e21e 3require Exporter;
6280b799 4require Cwd;
a0d0e21e 5
f06db76b
AD
6=head1 NAME
7
8find - traverse a file tree
9
10finddepth - traverse a directory structure depth-first
11
12=head1 SYNOPSIS
13
14 use File::Find;
81793b90 15 find(\&wanted, '/foo', '/bar');
f06db76b 16 sub wanted { ... }
237437d0 17
f06db76b 18 use File::Find;
81793b90 19 finddepth(\&wanted, '/foo', '/bar');
f06db76b 20 sub wanted { ... }
81793b90
GS
21
22 use File::Find;
23 find({ wanted => \&process, follow => 1 }, '.');
f06db76b
AD
24
25=head1 DESCRIPTION
26
20408e3c 27The first argument to find() is either a hash reference describing the
81793b90 28operations to be performed for each file, or a code reference.
20408e3c 29
81793b90
GS
30Here are the possible keys for the hash:
31
32=over 3
33
34=item C<wanted>
35
36The value should be a code reference. This code reference is called
37I<the wanted() function> below.
38
39=item C<bydepth>
40
41Reports the name of a directory only AFTER all its entries
42have been reported. Entry point finddepth() is a shortcut for
43specifying C<{ bydepth => 1 }> in the first argument of find().
44
45=item C<follow>
46
47Causes symbolic links to be followed. Since directory trees with symbolic
48links (followed) may contain files more than once and may even have
49cycles, a hash has to be built up with an entry for each file.
50This might be expensive both in space and time for a large
51directory tree. See I<follow_fast> and I<follow_skip> below.
52If either I<follow> or I<follow_fast> is in effect:
53
54=over 6
55
56=item
57
58It is guarantueed that an I<lstat> has been called before the user's
59I<wanted()> function is called. This enables fast file checks involving S< _>.
60
61=item
62
63There is a variable C<$File::Find::fullname> which holds the absolute
64pathname of the file with all symbolic links resolved
65
66=back
67
68=item C<follow_fast>
69
70This is similar to I<follow> except that it may report some files
71more than once. It does detect cycles however.
72Since only symbolic links have to be hashed, this is
73much cheaper both in space and time.
74If processing a file more than once (by the user's I<wanted()> function)
75is worse than just taking time, the option I<follow> should be used.
76
77=item C<follow_skip>
78
79C<follow_skip==1>, which is the default, causes all files which are
80neither directories nor symbolic links to be ignored if they are about
81to be processed a second time. If a directory or a symbolic link
82are about to be processed a second time, File::Find dies.
83C<follow_skip==0> causes File::Find to die if any file is about to be
84processed a second time.
85C<follow_skip==2> causes File::Find to ignore any duplicate files and
86dirctories but to proceed normally otherwise.
20408e3c 87
f06db76b 88
81793b90
GS
89=item C<no_chdir>
90
91Does not C<chdir()> to each directory as it recurses. The wanted()
92function will need to be aware of this, of course. In this case,
93C<$_> will be the same as C<$File::Find::name>.
94
95=item C<untaint>
96
97If find is used in taint-mode (-T command line switch or if EUID != UID
98or if EGID != GID) then internally directory names have to be untainted
99before they can be cd'ed to. Therefore they are checked against a regular
100expression I<untaint_pattern>. Note, that all names passed to the
101user's I<wanted()> function are still tainted.
102
103=item C<untaint_pattern>
104
105See above. This should be set using the C<qr> quoting operator.
106The default is set to C<qr|^([-+@\w./]+)$|>.
107Note that the paranthesis which are vital.
108
109=item C<untaint_skip>
110
111If set, directories (subtrees) which fail the I<untaint_pattern>
112are skipped. The default is to 'die' in such a case.
113
114=back
115
116The wanted() function does whatever verifications you want.
117C<$File::Find::dir> contains the current directory name, and C<$_> the
118current filename within that directory. C<$File::Find::name> contains
119the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when
120the function is called, unless C<no_chdir> was specified.
121When <follow> or <follow_fast> are in effect there is also a
122C<$File::Find::fullname>.
123The function may set C<$File::Find::prune> to prune the tree
124unless C<bydepth> was specified.
47a735e8 125
20408e3c 126This library is useful for the C<find2perl> tool, which when fed,
f06db76b
AD
127
128 find2perl / -name .nfs\* -mtime +7 \
81793b90 129 -exec rm -f {} \; -o -fstype nfs -prune
f06db76b
AD
130
131produces something like:
132
133 sub wanted {
134 /^\.nfs.*$/ &&
81793b90 135 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
f06db76b
AD
136 int(-M _) > 7 &&
137 unlink($_)
138 ||
81793b90 139 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
f06db76b 140 $dev < 0 &&
6280b799 141 ($File::Find::prune = 1);
f06db76b
AD
142 }
143
81793b90 144Set the variable C<$File::Find::dont_use_nlink> if you're using AFS,
6280b799 145since AFS cheats.
f06db76b 146
f06db76b
AD
147
148Here's another interesting wanted function. It will find all symlinks
149that don't resolve:
150
151 sub wanted {
81793b90 152 -l && !-e && print "bogus link: $File::Find::name\n";
237437d0 153 }
f06db76b 154
81793b90
GS
155See also the script C<pfind> on CPAN for a nice application of this
156module.
157
158=head1 CAVEAT
159
160Be aware that the option to follow symblic links can be dangerous.
161Depending on the structure of the directory tree (including symbolic
162links to directories) you might traverse a given (physical) directory
163more than once (only if C<follow_fast> is in effect).
164Furthermore, deleting or changing files in a symbolically linked directory
165might cause very unpleasant surprises, since you delete or change files
166in an unknown directory.
0530a6c4 167
0530a6c4 168
f06db76b
AD
169=cut
170
a0d0e21e 171@ISA = qw(Exporter);
6280b799
PP
172@EXPORT = qw(find finddepth);
173
a0d0e21e 174
81793b90
GS
175use strict;
176my $Is_VMS;
177
178require File::Basename;
179
180my %SLnkSeen;
181my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
182 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat);
183
184sub contract_name {
185 my ($cdir,$fn) = @_;
186
187 return substr($cdir,0,rindex($cdir,'/')) if $fn eq '.';
188
189 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
190
191 $fn =~ s|^\./||;
192
193 my $abs_name= $cdir . $fn;
194
195 if (substr($fn,0,3) eq '../') {
196 do 1 while ($abs_name=~ s|/(?>[^/]+)/\.\./|/|);
197 }
198
199 return $abs_name;
200}
201
202
203sub PathCombine($$) {
204 my ($Base,$Name) = @_;
205 my $AbsName;
206
207 if (substr($Name,0,1) eq '/') {
208 $AbsName= $Name;
209 }
210 else {
211 $AbsName= contract_name($Base,$Name);
212 }
213
214 # (simple) check for recursion
215 my $newlen= length($AbsName);
216 if ($newlen <= length($Base)) {
217 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
218 && $AbsName eq substr($Base,0,$newlen))
219 {
220 return undef;
221 }
222 }
223 return $AbsName;
224}
225
226sub Follow_SymLink($) {
227 my ($AbsName) = @_;
228
229 my ($NewName,$DEV, $INO);
230 ($DEV, $INO)= lstat $AbsName;
231
232 while (-l _) {
233 if ($SLnkSeen{$DEV, $INO}++) {
234 if ($follow_skip < 2) {
235 die "$AbsName is encountered a second time";
a0d0e21e
LW
236 }
237 else {
81793b90 238 return undef;
a0d0e21e
LW
239 }
240 }
81793b90
GS
241 $NewName= PathCombine($AbsName, readlink($AbsName));
242 unless(defined $NewName) {
243 if ($follow_skip < 2) {
244 die "$AbsName is a recursive symbolic link";
245 }
246 else {
247 return undef;
a0d0e21e 248 }
81793b90
GS
249 }
250 else {
251 $AbsName= $NewName;
252 }
253 ($DEV, $INO) = lstat($AbsName);
254 return undef unless defined $DEV; # dangling symbolic link
255 }
256
257 if ($full_check && $SLnkSeen{$DEV, $INO}++) {
258 if ($follow_skip < 1) {
259 die "$AbsName encountered a second time";
260 }
261 else {
262 return undef;
263 }
264 }
265
266 return $AbsName;
267}
268
269use vars qw/ $dir $name $fullname $prune /;
270sub _find_dir_symlnk($$$);
271sub _find_dir($$$);
272
273sub _find_opt {
274 my $wanted = shift;
275 die "invalid top directory" unless defined $_[0];
276
277 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd();
278 my $cwd_untainted = $cwd;
279 $wanted_callback = $wanted->{wanted};
280 $bydepth = $wanted->{bydepth};
281 $no_chdir = $wanted->{no_chdir};
282 $full_check = $wanted->{follow};
283 $follow = $full_check || $wanted->{follow_fast};
284 $follow_skip = $wanted->{follow_skip};
285 $untaint = $wanted->{untaint};
286 $untaint_pat = $wanted->{untaint_pattern};
287 $untaint_skip = $wanted->{untaint_skip};
288
289
290 # a symbolic link to a directory doesn't increase the link count
291 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
292
293 if ( $untaint ) {
294 $cwd_untainted= $1 if $cwd_untainted =~ m|$untaint_pat|;
295 die "insecure cwd in find(depth)" unless defined($cwd_untainted);
296 }
297
298 my ($abs_dir, $nlink, $Is_Dir);
299
300 Proc_Top_Item:
301 foreach my $TOP (@_) {
302 my $top_item = $TOP;
07867069 303 $top_item =~ s|/$|| unless $top_item eq '/';
81793b90
GS
304 $Is_Dir= 0;
305
306 if ($follow) {
307 if (substr($top_item,0,1) eq '/') {
308 $abs_dir = $top_item;
309 }
310 elsif ($top_item eq '.') {
311 $abs_dir = $cwd;
237437d0 312 }
81793b90
GS
313 else { # care about any ../
314 $abs_dir = contract_name("$cwd/",$top_item);
315 }
316 $abs_dir= Follow_SymLink($abs_dir);
317 unless (defined $abs_dir) {
318 warn "$top_item is a dangling symbolic link\n";
319 next Proc_Top_Item;
320 }
321 if (-d _) {
322 _find_dir_symlnk($wanted, $abs_dir, $top_item);
323 $Is_Dir= 1;
324 }
325 }
326 else { # no follow
327 $nlink = (lstat $top_item)[3];
328 unless (defined $nlink) {
329 warn "Can't stat $top_item: $!\n";
330 next Proc_Top_Item;
331 }
332 if (-d _) {
333 $top_item =~ s/\.dir$// if $Is_VMS;
334 _find_dir($wanted, $top_item, $nlink);
335 $Is_Dir= 1;
336 }
237437d0 337 else {
81793b90
GS
338 $abs_dir= $top_item;
339 }
340 }
341
342 unless ($Is_Dir) {
343 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
344 ($dir,$_) = ('.', $top_item);
345 }
346
347 $abs_dir = $dir;
348 if ($untaint) {
349 my $abs_dir_save = $abs_dir;
350 $abs_dir = $1 if $abs_dir =~ m|$untaint_pat|;
351 unless (defined $abs_dir) {
352 if ($untaint_skip == 0) {
353 die "directory $abs_dir_save is still tainted";
354 }
355 else {
356 next Proc_Top_Item;
357 }
358 }
359 }
360
361 unless ($no_chdir or chdir $abs_dir) {
362 warn "Couldn't chdir $abs_dir: $!\n";
363 next Proc_Top_Item;
364 }
365
366 $name = $abs_dir;
367
368 &$wanted_callback;
369
370 }
371
372 $no_chdir or chdir $cwd_untainted;
373 }
374}
375
376# API:
377# $wanted
378# $p_dir : "parent directory"
379# $nlink : what came back from the stat
380# preconditions:
381# chdir (if not no_chdir) to dir
382
383sub _find_dir($$$) {
384 my ($wanted, $p_dir, $nlink) = @_;
385 my ($CdLvl,$Level) = (0,0);
386 my @Stack;
387 my @filenames;
388 my ($subcount,$sub_nlink);
389 my $SE= [];
390 my $dir_name= $p_dir;
07867069 391 my $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
81793b90
GS
392 my $dir_rel= '.'; # directory name relative to current directory
393
394 local ($dir, $name, $prune, *DIR);
395
396 unless ($no_chdir or $p_dir eq '.') {
397 my $udir = $p_dir;
398 if ($untaint) {
399 $udir = $1 if $p_dir =~ m|$untaint_pat|;
400 unless (defined $udir) {
401 if ($untaint_skip == 0) {
402 die "directory $p_dir is still tainted";
403 }
404 else {
405 return;
406 }
237437d0 407 }
a0d0e21e 408 }
81793b90
GS
409 unless (chdir $udir) {
410 warn "Can't cd to $udir: $!\n";
411 return;
412 }
413 }
414
415 while (defined $SE) {
416 unless ($bydepth) {
417 $dir= $p_dir;
418 $name= $dir_name;
419 $_= ($no_chdir ? $dir_name : $dir_rel );
420 # prune may happen here
421 $prune= 0;
422 &$wanted_callback;
423 next if $prune;
424 }
425
426 # change to that directory
427 unless ($no_chdir or $dir_rel eq '.') {
428 my $udir= $dir_rel;
429 if ($untaint) {
430 $udir = $1 if $dir_rel =~ m|$untaint_pat|;
431 unless (defined $udir) {
432 if ($untaint_skip == 0) {
07867069
HJ
433 die "directory ("
434 . ($p_dir ne '/' ? $p_dir : '')
435 . "/) $dir_rel is still tainted";
81793b90
GS
436 }
437 }
438 }
439 unless (chdir $udir) {
07867069
HJ
440 warn "Can't cd to ("
441 . ($p_dir ne '/' ? $p_dir : '')
442 . "/) $udir : $!\n";
81793b90
GS
443 next;
444 }
445 $CdLvl++;
446 }
447
448 $dir= $dir_name;
449
450 # Get the list of files in the current directory.
451 unless (opendir DIR, ($no_chdir ? $dir_name : '.')) {
452 warn "Can't opendir($dir_name): $!\n";
453 next;
454 }
455 @filenames = readdir DIR;
456 closedir(DIR);
457
458 if ($nlink == 2 && !$avoid_nlink) {
459 # This dir has no subdirectories.
460 for my $FN (@filenames) {
461 next if $FN =~ /^\.{1,2}$/;
462
07867069 463 $name = $dir_pref . $FN;
81793b90
GS
464 $_ = ($no_chdir ? $name : $FN);
465 &$wanted_callback;
466 }
467
468 }
469 else {
470 # This dir has subdirectories.
471 $subcount = $nlink - 2;
472
473 for my $FN (@filenames) {
474 next if $FN =~ /^\.{1,2}$/;
475 if ($subcount > 0 || $avoid_nlink) {
476 # Seen all the subdirs?
477 # check for directoriness.
478 # stat is faster for a file in the current directory
07867069 479 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
81793b90
GS
480
481 if (-d _) {
482 --$subcount;
483 $FN =~ s/\.dir$// if $Is_VMS;
484 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
485 }
486 else {
07867069 487 $name = $dir_pref . $FN;
81793b90
GS
488 $_= ($no_chdir ? $name : $FN);
489 &$wanted_callback;
490 }
491 }
07867069
HJ
492 else {
493 $name = $dir_pref . $FN;
81793b90
GS
494 $_= ($no_chdir ? $name : $FN);
495 &$wanted_callback;
496 }
497 }
498 }
499 if ($bydepth) {
500 $name = $dir_name;
501 $dir = $p_dir;
502 $_ = ($no_chdir ? $dir_name : $dir_rel );
503 &$wanted_callback;
504 }
17b275ff
RA
505 }
506 continue {
81793b90
GS
507 if ( defined ($SE = pop @Stack) ) {
508 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
509 if ($CdLvl > $Level && !$no_chdir) {
510 die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level)
511 unless chdir '../' x ($CdLvl-$Level);
512 $CdLvl = $Level;
513 }
07867069
HJ
514 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
515 $dir_pref = "$dir_name/";
81793b90 516 }
a0d0e21e
LW
517 }
518}
519
81793b90
GS
520
521# API:
522# $wanted
523# $dir_loc : absolute location of a dir
524# $p_dir : "parent directory"
525# preconditions:
526# chdir (if not no_chdir) to dir
527
528sub _find_dir_symlnk($$$) {
529 my ($wanted, $dir_loc, $p_dir) = @_;
530 my @Stack;
531 my @filenames;
532 my $new_loc;
533 my $SE = [];
534 my $dir_name = $p_dir;
07867069
HJ
535 my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
536 my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
81793b90
GS
537 my $dir_rel = '.'; # directory name relative to current directory
538
539 local ($dir, $name, $fullname, $prune, *DIR);
540
541 unless ($no_chdir or $p_dir eq '.') {
542 my $udir = $dir_loc;
543 if ($untaint) {
544 $udir = $1 if $dir_loc =~ m|$untaint_pat|;
545 unless (defined $udir) {
546 if ($untaint_skip == 0) {
547 die "directory $dir_loc is still tainted";
548 }
549 else {
550 return;
551 }
552 }
553 }
554 unless (chdir $udir) {
555 warn "Can't cd to $udir: $!\n";
556 return;
557 }
558 }
559
560 while (defined $SE) {
561
562 unless ($bydepth) {
563 $dir= $p_dir;
564 $name= $dir_name;
565 $_= ($no_chdir ? $dir_name : $dir_rel );
566 $fullname= $dir_loc;
567 # prune may happen here
568 $prune= 0;
569 &$wanted_callback;
570 next if $prune;
571 }
572
573 # change to that directory
574 unless ($no_chdir or $dir_rel eq '.') {
575 my $udir = $dir_loc;
576 if ($untaint) {
577 $udir = $1 if $dir_loc =~ m|$untaint_pat|;
578 unless (defined $udir ) {
579 if ($untaint_skip == 0) {
580 die "directory $dir_loc is still tainted";
a0d0e21e 581 }
237437d0 582 else {
81793b90 583 next;
237437d0 584 }
a0d0e21e
LW
585 }
586 }
81793b90
GS
587 unless (chdir $udir) {
588 warn "Can't cd to $udir: $!\n";
589 next;
590 }
591 }
592
593 $dir = $dir_name;
594
595 # Get the list of files in the current directory.
596 unless (opendir DIR, ($no_chdir ? $dir_loc : '.')) {
597 warn "Can't opendir($dir_loc): $!\n";
598 next;
599 }
600 @filenames = readdir DIR;
601 closedir(DIR);
602
603 for my $FN (@filenames) {
604 next if $FN =~ /^\.{1,2}$/;
605
606 # follow symbolic links / do an lstat
07867069 607 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90
GS
608
609 # ignore if invalid symlink
610 next unless defined $new_loc;
611
612 if (-d _) {
613 push @Stack,[$new_loc,$dir_name,$FN];
614 }
615 else {
616 $fullname = $new_loc;
07867069 617 $name = $dir_pref . $FN;
81793b90
GS
618 $_ = ($no_chdir ? $name : $FN);
619 &$wanted_callback;
620 }
621 }
622
623 if ($bydepth) {
624 $fullname = $dir_loc;
625 $name = $dir_name;
626 $_ = ($no_chdir ? $dir_name : $dir_rel);
627 &$wanted_callback;
628 }
629 }
630 continue {
631 if (defined($SE = pop @Stack)) {
632 ($dir_loc, $p_dir, $dir_rel) = @$SE;
07867069
HJ
633 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
634 $dir_pref = "$dir_name/";
635 $loc_pref = "$dir_loc/";
a0d0e21e
LW
636 }
637 }
638}
639
81793b90 640
20408e3c 641sub wrap_wanted {
81793b90
GS
642 my $wanted = shift;
643 if ( ref($wanted) eq 'HASH' ) {
644 if ( $wanted->{follow} || $wanted->{follow_fast}) {
645 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
646 }
647 if ( $wanted->{untaint} ) {
648 $wanted->{untaint_pattern} = qr|^([-+@\w./]+)$|
649 unless defined $wanted->{untaint_pattern};
650 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
651 }
652 return $wanted;
653 }
654 else {
655 return { wanted => $wanted };
656 }
a0d0e21e
LW
657}
658
20408e3c 659sub find {
81793b90
GS
660 my $wanted = shift;
661 _find_opt(wrap_wanted($wanted), @_);
662 %SLnkSeen= (); # free memory
a0d0e21e
LW
663}
664
55d729e4 665sub finddepth {
81793b90
GS
666 my $wanted = wrap_wanted(shift);
667 $wanted->{bydepth} = 1;
668 _find_opt($wanted, @_);
669 %SLnkSeen= (); # free memory
20408e3c 670}
6280b799
PP
671
672# These are hard-coded for now, but may move to hint files.
10eba763 673if ($^O eq 'VMS') {
81793b90
GS
674 $Is_VMS = 1;
675 $File::Find::dont_use_nlink = 1;
748a9306
LW
676}
677
81793b90 678$File::Find::dont_use_nlink = 1
3e8584ad 679 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32';
6280b799 680
20408e3c
GS
681# Set dont_use_nlink in your hint file if your system's stat doesn't
682# report the number of links in a directory as an indication
683# of the number of files.
684# See, e.g. hints/machten.sh for MachTen 2.2.
81793b90
GS
685unless ($File::Find::dont_use_nlink) {
686 require Config;
687 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
20408e3c
GS
688}
689
a0d0e21e 6901;