This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
speed up building with less disk IO pod moves+__END__+misc
[perl5.git] / ext / File-Find / lib / File / Find.pm
CommitLineData
a0d0e21e 1package File::Find;
3b825e41 2use 5.006;
b75c8c73 3use strict;
b395063c 4use warnings;
cd68ec93 5use warnings::register;
f4eedc6b 6our $VERSION = '1.29';
a0d0e21e 7require Exporter;
6280b799 8require Cwd;
a0d0e21e 9
f4eedc6b
DD
10our @ISA = qw(Exporter);
11our @EXPORT = qw(find finddepth);
3cb6de81 12
f06db76b 13
f4eedc6b
DD
14use strict;
15my $Is_VMS;
16my $Is_Win32;
f06db76b 17
f4eedc6b
DD
18require File::Basename;
19require File::Spec;
abfdd623 20
f4eedc6b
DD
21# Should ideally be my() not our() but local() currently
22# refuses to operate on lexicals
abfdd623 23
f4eedc6b
DD
24our %SLnkSeen;
25our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
26 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
27 $pre_process, $post_process, $dangling_symlinks);
abfdd623 28
f4eedc6b
DD
29sub contract_name {
30 my ($cdir,$fn) = @_;
abfdd623 31
f4eedc6b 32 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
abfdd623 33
f4eedc6b 34 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
abfdd623 35
f4eedc6b 36 $fn =~ s|^\./||;
abfdd623 37
f4eedc6b 38 my $abs_name= $cdir . $fn;
abfdd623 39
f4eedc6b
DD
40 if (substr($fn,0,3) eq '../') {
41 1 while $abs_name =~ s!/[^/]*/\.\./+!/!;
42 }
abfdd623 43
f4eedc6b
DD
44 return $abs_name;
45}
abfdd623 46
f4eedc6b
DD
47sub PathCombine($$) {
48 my ($Base,$Name) = @_;
49 my $AbsName;
20408e3c 50
f4eedc6b
DD
51 if (substr($Name,0,1) eq '/') {
52 $AbsName= $Name;
53 }
54 else {
55 $AbsName= contract_name($Base,$Name);
56 }
81793b90 57
f4eedc6b
DD
58 # (simple) check for recursion
59 my $newlen= length($AbsName);
60 if ($newlen <= length($Base)) {
61 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
62 && $AbsName eq substr($Base,0,$newlen))
63 {
64 return undef;
65 }
66 }
67 return $AbsName;
68}
81793b90 69
f4eedc6b
DD
70sub Follow_SymLink($) {
71 my ($AbsName) = @_;
81793b90 72
f4eedc6b
DD
73 my ($NewName,$DEV, $INO);
74 ($DEV, $INO)= lstat $AbsName;
81793b90 75
f4eedc6b
DD
76 while (-l _) {
77 if ($SLnkSeen{$DEV, $INO}++) {
78 if ($follow_skip < 2) {
79 die "$AbsName is encountered a second time";
80 }
81 else {
82 return undef;
83 }
84 }
85 $NewName= PathCombine($AbsName, readlink($AbsName));
86 unless(defined $NewName) {
87 if ($follow_skip < 2) {
88 die "$AbsName is a recursive symbolic link";
89 }
90 else {
91 return undef;
92 }
93 }
94 else {
95 $AbsName= $NewName;
96 }
97 ($DEV, $INO) = lstat($AbsName);
98 return undef unless defined $DEV; # dangling symbolic link
99 }
81793b90 100
f4eedc6b
DD
101 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
102 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
103 die "$AbsName encountered a second time";
104 }
105 else {
106 return undef;
107 }
108 }
81793b90 109
f4eedc6b
DD
110 return $AbsName;
111}
719c805e 112
f4eedc6b
DD
113our($dir, $name, $fullname, $prune);
114sub _find_dir_symlnk($$$);
115sub _find_dir($$$);
719c805e 116
f4eedc6b
DD
117# check whether or not a scalar variable is tainted
118# (code straight from the Camel, 3rd ed., page 561)
119sub is_tainted_pp {
120 my $arg = shift;
121 my $nada = substr($arg, 0, 0); # zero-length
122 local $@;
123 eval { eval "# $nada" };
124 return length($@) != 0;
125}
719c805e 126
f4eedc6b
DD
127sub _find_opt {
128 my $wanted = shift;
129 die "invalid top directory" unless defined $_[0];
719c805e 130
f4eedc6b
DD
131 # This function must local()ize everything because callbacks may
132 # call find() or finddepth()
81793b90 133
f4eedc6b
DD
134 local %SLnkSeen;
135 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
136 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
137 $pre_process, $post_process, $dangling_symlinks);
138 local($dir, $name, $fullname, $prune);
139 local *_ = \my $a;
81793b90 140
f4eedc6b
DD
141 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
142 if ($Is_VMS) {
143 # VMS returns this by default in VMS format which just doesn't
144 # work for the rest of this module.
145 $cwd = VMS::Filespec::unixpath($cwd);
81793b90 146
f4eedc6b
DD
147 # Apparently this is not expected to have a trailing space.
148 # To attempt to make VMS/UNIX conversions mostly reversible,
149 # a trailing slash is needed. The run-time functions ignore the
150 # resulting double slash, but it causes the perl tests to fail.
151 $cwd =~ s#/\z##;
81793b90 152
f4eedc6b
DD
153 # This comes up in upper case now, but should be lower.
154 # In the future this could be exact case, no need to change.
155 }
156 my $cwd_untainted = $cwd;
157 my $check_t_cwd = 1;
158 $wanted_callback = $wanted->{wanted};
159 $bydepth = $wanted->{bydepth};
160 $pre_process = $wanted->{preprocess};
161 $post_process = $wanted->{postprocess};
162 $no_chdir = $wanted->{no_chdir};
163 $full_check = $Is_Win32 ? 0 : $wanted->{follow};
164 $follow = $Is_Win32 ? 0 :
165 $full_check || $wanted->{follow_fast};
166 $follow_skip = $wanted->{follow_skip};
167 $untaint = $wanted->{untaint};
168 $untaint_pat = $wanted->{untaint_pattern};
169 $untaint_skip = $wanted->{untaint_skip};
170 $dangling_symlinks = $wanted->{dangling_symlinks};
81793b90 171
f4eedc6b
DD
172 # for compatibility reasons (find.pl, find2perl)
173 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
81793b90 174
f4eedc6b
DD
175 # a symbolic link to a directory doesn't increase the link count
176 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
81793b90 177
f4eedc6b 178 my ($abs_dir, $Is_Dir);
81793b90 179
f4eedc6b
DD
180 Proc_Top_Item:
181 foreach my $TOP (@_) {
182 my $top_item = $TOP;
183 $top_item = VMS::Filespec::unixify($top_item) if $Is_VMS;
204b4d7f 184
f4eedc6b 185 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
81793b90 186
f4eedc6b
DD
187 if ($Is_Win32) {
188 $top_item =~ s|[/\\]\z||
189 unless $top_item =~ m{^(?:\w:)?[/\\]$};
190 }
191 else {
192 $top_item =~ s|/\z|| unless $top_item eq '/';
193 }
81793b90 194
f4eedc6b 195 $Is_Dir= 0;
204b4d7f 196
f4eedc6b 197 if ($follow) {
81793b90 198
f4eedc6b
DD
199 if (substr($top_item,0,1) eq '/') {
200 $abs_dir = $top_item;
201 }
202 elsif ($top_item eq $File::Find::current_dir) {
203 $abs_dir = $cwd;
204 }
205 else { # care about any ../
206 $top_item =~ s/\.dir\z//i if $Is_VMS;
207 $abs_dir = contract_name("$cwd/",$top_item);
208 }
209 $abs_dir= Follow_SymLink($abs_dir);
210 unless (defined $abs_dir) {
211 if ($dangling_symlinks) {
212 if (ref $dangling_symlinks eq 'CODE') {
213 $dangling_symlinks->($top_item, $cwd);
214 } else {
215 warnings::warnif "$top_item is a dangling symbolic link\n";
216 }
217 }
218 next Proc_Top_Item;
219 }
95e23d19 220
f4eedc6b
DD
221 if (-d _) {
222 $top_item =~ s/\.dir\z//i if $Is_VMS;
223 _find_dir_symlnk($wanted, $abs_dir, $top_item);
224 $Is_Dir= 1;
225 }
226 }
227 else { # no follow
228 $topdir = $top_item;
229 unless (defined $topnlink) {
230 warnings::warnif "Can't stat $top_item: $!\n";
231 next Proc_Top_Item;
232 }
233 if (-d _) {
234 $top_item =~ s/\.dir\z//i if $Is_VMS;
235 _find_dir($wanted, $top_item, $topnlink);
236 $Is_Dir= 1;
237 }
238 else {
239 $abs_dir= $top_item;
240 }
241 }
95e23d19 242
f4eedc6b
DD
243 unless ($Is_Dir) {
244 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
245 ($dir,$_) = ('./', $top_item);
246 }
20408e3c 247
f4eedc6b
DD
248 $abs_dir = $dir;
249 if (( $untaint ) && (is_tainted($dir) )) {
250 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
251 unless (defined $abs_dir) {
252 if ($untaint_skip == 0) {
253 die "directory $dir is still tainted";
254 }
255 else {
256 next Proc_Top_Item;
257 }
258 }
259 }
80e52b73 260
f4eedc6b
DD
261 unless ($no_chdir || chdir $abs_dir) {
262 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
263 next Proc_Top_Item;
264 }
f06db76b 265
f4eedc6b
DD
266 $name = $abs_dir . $_; # $File::Find::name
267 $_ = $name if $no_chdir;
81793b90 268
f4eedc6b 269 { $wanted_callback->() }; # protect against wild "next"
81793b90 270
f4eedc6b 271 }
81793b90 272
f4eedc6b
DD
273 unless ( $no_chdir ) {
274 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
275 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
276 unless (defined $cwd_untainted) {
277 die "insecure cwd in find(depth)";
278 }
279 $check_t_cwd = 0;
280 }
281 unless (chdir $cwd_untainted) {
282 die "Can't cd to $cwd: $!\n";
283 }
284 }
285 }
286}
81793b90 287
f4eedc6b
DD
288# API:
289# $wanted
290# $p_dir : "parent directory"
291# $nlink : what came back from the stat
292# preconditions:
293# chdir (if not no_chdir) to dir
81793b90 294
f4eedc6b
DD
295sub _find_dir($$$) {
296 my ($wanted, $p_dir, $nlink) = @_;
297 my ($CdLvl,$Level) = (0,0);
298 my @Stack;
299 my @filenames;
300 my ($subcount,$sub_nlink);
301 my $SE= [];
302 my $dir_name= $p_dir;
303 my $dir_pref;
304 my $dir_rel = $File::Find::current_dir;
305 my $tainted = 0;
306 my $no_nlink;
81793b90 307
f4eedc6b
DD
308 if ($Is_Win32) {
309 $dir_pref
310 = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$} ? $p_dir : "$p_dir/" );
311 } elsif ($Is_VMS) {
81793b90 312
f4eedc6b
DD
313 # VMS is returning trailing .dir on directories
314 # and trailing . on files and symbolic links
315 # in UNIX syntax.
316 #
81793b90 317
f4eedc6b 318 $p_dir =~ s/\.(dir)?$//i unless $p_dir eq '.';
81793b90 319
f4eedc6b
DD
320 $dir_pref = ($p_dir =~ m/[\]>]+$/ ? $p_dir : "$p_dir/" );
321 }
322 else {
323 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
324 }
abfdd623 325
f4eedc6b 326 local ($dir, $name, $prune, *DIR);
95e23d19 327
f4eedc6b
DD
328 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
329 my $udir = $p_dir;
330 if (( $untaint ) && (is_tainted($p_dir) )) {
331 ( $udir ) = $p_dir =~ m|$untaint_pat|;
332 unless (defined $udir) {
333 if ($untaint_skip == 0) {
334 die "directory $p_dir is still tainted";
335 }
336 else {
337 return;
338 }
339 }
340 }
341 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
342 warnings::warnif "Can't cd to $udir: $!\n";
343 return;
344 }
345 }
abfdd623 346
f4eedc6b
DD
347 # push the starting directory
348 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
abfdd623 349
f4eedc6b
DD
350 while (defined $SE) {
351 unless ($bydepth) {
352 $dir= $p_dir; # $File::Find::dir
353 $name= $dir_name; # $File::Find::name
354 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
355 # prune may happen here
356 $prune= 0;
357 { $wanted_callback->() }; # protect against wild "next"
358 next if $prune;
359 }
abfdd623 360
f4eedc6b
DD
361 # change to that directory
362 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
363 my $udir= $dir_rel;
364 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
365 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
366 unless (defined $udir) {
367 if ($untaint_skip == 0) {
368 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
369 } else { # $untaint_skip == 1
370 next;
371 }
372 }
373 }
374 unless (chdir ($Is_VMS && $udir !~ /[\/\[<]+/ ? "./$udir" : $udir)) {
375 warnings::warnif "Can't cd to (" .
376 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
377 next;
378 }
379 $CdLvl++;
380 }
abfdd623 381
f4eedc6b 382 $dir= $dir_name; # $File::Find::dir
abfdd623 383
f4eedc6b
DD
384 # Get the list of files in the current directory.
385 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
386 warnings::warnif "Can't opendir($dir_name): $!\n";
387 next;
388 }
389 @filenames = readdir DIR;
390 closedir(DIR);
391 @filenames = $pre_process->(@filenames) if $pre_process;
392 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
abfdd623 393
f4eedc6b
DD
394 # default: use whatever was specified
395 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
396 $no_nlink = $avoid_nlink;
397 # if dir has wrong nlink count, force switch to slower stat method
398 $no_nlink = 1 if ($nlink < 2);
4c767a4e 399
f4eedc6b
DD
400 if ($nlink == 2 && !$no_nlink) {
401 # This dir has no subdirectories.
402 for my $FN (@filenames) {
403 if ($Is_VMS) {
404 # Big hammer here - Compensate for VMS trailing . and .dir
405 # No win situation until this is changed, but this
406 # will handle the majority of the cases with breaking the fewest
abfdd623 407
f4eedc6b
DD
408 $FN =~ s/\.dir\z//i;
409 $FN =~ s#\.$## if ($FN ne '.');
410 }
411 next if $FN =~ $File::Find::skip_pattern;
412
413 $name = $dir_pref . $FN; # $File::Find::name
414 $_ = ($no_chdir ? $name : $FN); # $_
415 { $wanted_callback->() }; # protect against wild "next"
416 }
abfdd623 417
f4eedc6b
DD
418 }
419 else {
420 # This dir has subdirectories.
421 $subcount = $nlink - 2;
5cf0a2f2 422
f4eedc6b
DD
423 # HACK: insert directories at this position. so as to preserve
424 # the user pre-processed ordering of files.
425 # EG: directory traversal is in user sorted order, not at random.
426 my $stack_top = @Stack;
abfdd623 427
f4eedc6b
DD
428 for my $FN (@filenames) {
429 next if $FN =~ $File::Find::skip_pattern;
430 if ($subcount > 0 || $no_nlink) {
431 # Seen all the subdirs?
432 # check for directoriness.
433 # stat is faster for a file in the current directory
434 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
5cf0a2f2 435
f4eedc6b
DD
436 if (-d _) {
437 --$subcount;
438 $FN =~ s/\.dir\z//i if $Is_VMS;
439 # HACK: replace push to preserve dir traversal order
440 #push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
441 splice @Stack, $stack_top, 0,
442 [$CdLvl,$dir_name,$FN,$sub_nlink];
443 }
444 else {
445 $name = $dir_pref . $FN; # $File::Find::name
446 $_= ($no_chdir ? $name : $FN); # $_
447 { $wanted_callback->() }; # protect against wild "next"
448 }
449 }
450 else {
451 $name = $dir_pref . $FN; # $File::Find::name
452 $_= ($no_chdir ? $name : $FN); # $_
453 { $wanted_callback->() }; # protect against wild "next"
454 }
455 }
456 }
457 }
458 continue {
459 while ( defined ($SE = pop @Stack) ) {
460 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
461 if ($CdLvl > $Level && !$no_chdir) {
462 my $tmp;
463 if ($Is_VMS) {
464 $tmp = '[' . ('-' x ($CdLvl-$Level)) . ']';
465 }
466 else {
467 $tmp = join('/',('..') x ($CdLvl-$Level));
468 }
469 die "Can't cd to $tmp from $dir_name: $!"
470 unless chdir ($tmp);
471 $CdLvl = $Level;
472 }
5cf0a2f2 473
f4eedc6b
DD
474 if ($Is_Win32) {
475 $dir_name = ($p_dir =~ m{^(?:\w:[/\\]?|[/\\])$}
476 ? "$p_dir$dir_rel" : "$p_dir/$dir_rel");
477 $dir_pref = "$dir_name/";
478 }
479 elsif ($^O eq 'VMS') {
480 if ($p_dir =~ m/[\]>]+$/) {
481 $dir_name = $p_dir;
482 $dir_name =~ s/([\]>]+)$/.$dir_rel$1/;
483 $dir_pref = $dir_name;
484 }
485 else {
486 $dir_name = "$p_dir/$dir_rel";
487 $dir_pref = "$dir_name/";
488 }
489 }
490 else {
491 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
492 $dir_pref = "$dir_name/";
493 }
47a735e8 494
f4eedc6b
DD
495 if ( $nlink == -2 ) {
496 $name = $dir = $p_dir; # $File::Find::name / dir
497 $_ = $File::Find::current_dir;
498 $post_process->(); # End-of-directory processing
499 }
500 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
501 $name = $dir_name;
502 if ( substr($name,-2) eq '/.' ) {
503 substr($name, length($name) == 2 ? -1 : -2) = '';
504 }
505 $dir = $p_dir;
506 $_ = ($no_chdir ? $dir_name : $dir_rel );
507 if ( substr($_,-2) eq '/.' ) {
508 substr($_, length($_) == 2 ? -1 : -2) = '';
509 }
510 { $wanted_callback->() }; # protect against wild "next"
511 }
512 else {
513 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
514 last;
515 }
516 }
517 }
518}
f06db76b 519
f06db76b 520
f4eedc6b
DD
521# API:
522# $wanted
523# $dir_loc : absolute location of a dir
524# $p_dir : "parent directory"
525# preconditions:
526# chdir (if not no_chdir) to dir
f06db76b 527
f4eedc6b
DD
528sub _find_dir_symlnk($$$) {
529 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
530 my @Stack;
531 my @filenames;
532 my $new_loc;
533 my $updir_loc = $dir_loc; # untainted parent directory
534 my $SE = [];
535 my $dir_name = $p_dir;
536 my $dir_pref;
537 my $loc_pref;
538 my $dir_rel = $File::Find::current_dir;
539 my $byd_flag; # flag for pending stack entry if $bydepth
540 my $tainted = 0;
541 my $ok = 1;
f06db76b 542
f4eedc6b
DD
543 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
544 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
43dece2a 545
f4eedc6b 546 local ($dir, $name, $fullname, $prune, *DIR);
f06db76b 547
f4eedc6b
DD
548 unless ($no_chdir) {
549 # untaint the topdir
550 if (( $untaint ) && (is_tainted($dir_loc) )) {
551 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
552 # once untainted, $updir_loc is pushed on the stack (as parent directory);
553 # hence, we don't need to untaint the parent directory every time we chdir
554 # to it later
555 unless (defined $updir_loc) {
556 if ($untaint_skip == 0) {
557 die "directory $dir_loc is still tainted";
558 }
559 else {
560 return;
561 }
562 }
563 }
564 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
565 unless ($ok) {
566 warnings::warnif "Can't cd to $updir_loc: $!\n";
567 return;
568 }
237437d0 569 }
f06db76b 570
f4eedc6b 571 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
c82ae0bc 572
f4eedc6b 573 while (defined $SE) {
c82ae0bc 574
f4eedc6b
DD
575 unless ($bydepth) {
576 # change (back) to parent directory (always untainted)
577 unless ($no_chdir) {
578 unless (chdir $updir_loc) {
579 warnings::warnif "Can't cd to $updir_loc: $!\n";
580 next;
581 }
582 }
583 $dir= $p_dir; # $File::Find::dir
584 $name= $dir_name; # $File::Find::name
585 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
586 $fullname= $dir_loc; # $File::Find::fullname
587 # prune may happen here
588 $prune= 0;
589 lstat($_); # make sure file tests with '_' work
590 { $wanted_callback->() }; # protect against wild "next"
591 next if $prune;
592 }
c82ae0bc 593
f4eedc6b
DD
594 # change to that directory
595 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
596 $updir_loc = $dir_loc;
597 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
598 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
599 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
600 unless (defined $updir_loc) {
601 if ($untaint_skip == 0) {
602 die "directory $dir_loc is still tainted";
603 }
604 else {
605 next;
606 }
607 }
608 }
609 unless (chdir $updir_loc) {
610 warnings::warnif "Can't cd to $updir_loc: $!\n";
611 next;
612 }
613 }
81793b90 614
f4eedc6b 615 $dir = $dir_name; # $File::Find::dir
cd68ec93 616
f4eedc6b
DD
617 # Get the list of files in the current directory.
618 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
619 warnings::warnif "Can't opendir($dir_loc): $!\n";
620 next;
621 }
622 @filenames = readdir DIR;
623 closedir(DIR);
cd68ec93 624
f4eedc6b
DD
625 for my $FN (@filenames) {
626 if ($Is_VMS) {
627 # Big hammer here - Compensate for VMS trailing . and .dir
628 # No win situation until this is changed, but this
629 # will handle the majority of the cases with breaking the fewest.
cd68ec93 630
f4eedc6b
DD
631 $FN =~ s/\.dir\z//i;
632 $FN =~ s#\.$## if ($FN ne '.');
633 }
634 next if $FN =~ $File::Find::skip_pattern;
cd68ec93 635
f4eedc6b
DD
636 # follow symbolic links / do an lstat
637 $new_loc = Follow_SymLink($loc_pref.$FN);
81793b90 638
f4eedc6b
DD
639 # ignore if invalid symlink
640 unless (defined $new_loc) {
641 if (!defined -l _ && $dangling_symlinks) {
642 $fullname = undef;
643 if (ref $dangling_symlinks eq 'CODE') {
644 $dangling_symlinks->($FN, $dir_pref);
645 } else {
646 warnings::warnif "$dir_pref$FN is a dangling symbolic link\n";
647 }
648 }
649 else {
650 $fullname = $loc_pref . $FN;
651 }
652 $name = $dir_pref . $FN;
653 $_ = ($no_chdir ? $name : $FN);
654 { $wanted_callback->() };
655 next;
656 }
5fa2bf2b 657
f4eedc6b
DD
658 if (-d _) {
659 if ($Is_VMS) {
660 $FN =~ s/\.dir\z//i;
661 $FN =~ s#\.$## if ($FN ne '.');
662 $new_loc =~ s/\.dir\z//i;
663 $new_loc =~ s#\.$## if ($new_loc ne '.');
664 }
665 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
666 }
667 else {
668 $fullname = $new_loc; # $File::Find::fullname
669 $name = $dir_pref . $FN; # $File::Find::name
670 $_ = ($no_chdir ? $name : $FN); # $_
671 { $wanted_callback->() }; # protect against wild "next"
672 }
673 }
5fa2bf2b 674
f4eedc6b
DD
675 }
676 continue {
677 while (defined($SE = pop @Stack)) {
678 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
679 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
680 $dir_pref = "$dir_name/";
681 $loc_pref = "$dir_loc/";
682 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
683 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
684 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
685 warnings::warnif "Can't cd to $updir_loc: $!\n";
686 next;
687 }
688 }
689 $fullname = $dir_loc; # $File::Find::fullname
690 $name = $dir_name; # $File::Find::name
691 if ( substr($name,-2) eq '/.' ) {
692 substr($name, length($name) == 2 ? -1 : -2) = ''; # $File::Find::name
693 }
694 $dir = $p_dir; # $File::Find::dir
695 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
696 if ( substr($_,-2) eq '/.' ) {
697 substr($_, length($_) == 2 ? -1 : -2) = '';
698 }
5fa2bf2b 699
f4eedc6b
DD
700 lstat($_); # make sure file tests with '_' work
701 { $wanted_callback->() }; # protect against wild "next"
702 }
703 else {
704 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
705 last;
706 }
707 }
708 }
709}
5fa2bf2b 710
5fa2bf2b 711
f4eedc6b
DD
712sub wrap_wanted {
713 my $wanted = shift;
714 if ( ref($wanted) eq 'HASH' ) {
715 # RT #122547
716 my %valid_options = map {$_ => 1} qw(
717 wanted
718 bydepth
719 preprocess
720 postprocess
721 follow
722 follow_fast
723 follow_skip
724 dangling_symlinks
725 no_chdir
726 untaint
727 untaint_pattern
728 untaint_skip
729 );
730 my @invalid_options = ();
731 for my $v (keys %{$wanted}) {
732 push @invalid_options, $v unless exists $valid_options{$v};
733 }
734 warn "Invalid option(s): @invalid_options" if @invalid_options;
735
736 unless( exists $wanted->{wanted} and ref( $wanted->{wanted} ) eq 'CODE' ) {
737 die 'no &wanted subroutine given';
738 }
739 if ( $wanted->{follow} || $wanted->{follow_fast}) {
740 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
741 }
742 if ( $wanted->{untaint} ) {
743 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
744 unless defined $wanted->{untaint_pattern};
745 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
746 }
747 return $wanted;
748 }
749 elsif( ref( $wanted ) eq 'CODE' ) {
750 return { wanted => $wanted };
751 }
752 else {
753 die 'no &wanted subroutine given';
754 }
755}
5fa2bf2b 756
f4eedc6b
DD
757sub find {
758 my $wanted = shift;
759 _find_opt(wrap_wanted($wanted), @_);
760}
0530a6c4 761
f4eedc6b
DD
762sub finddepth {
763 my $wanted = wrap_wanted(shift);
764 $wanted->{bydepth} = 1;
765 _find_opt($wanted, @_);
766}
5fa2bf2b 767
f4eedc6b
DD
768# default
769$File::Find::skip_pattern = qr/^\.{1,2}\z/;
770$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
6eb87ff8 771
f4eedc6b
DD
772# These are hard-coded for now, but may move to hint files.
773if ($^O eq 'VMS') {
774 $Is_VMS = 1;
775 $File::Find::dont_use_nlink = 1;
776}
777elsif ($^O eq 'MSWin32') {
778 $Is_Win32 = 1;
779}
6eb87ff8 780
f4eedc6b
DD
781# this _should_ work properly on all platforms
782# where File::Find can be expected to work
783$File::Find::current_dir = File::Spec->curdir || '.';
a85af077 784
f4eedc6b
DD
785$File::Find::dont_use_nlink = 1
786 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $Is_Win32 ||
787 $^O eq 'interix' || $^O eq 'cygwin' || $^O eq 'qnx' || $^O eq 'nto';
a85af077 788
f4eedc6b
DD
789# Set dont_use_nlink in your hint file if your system's stat doesn't
790# report the number of links in a directory as an indication
791# of the number of files.
792# See, e.g. hints/machten.sh for MachTen 2.2.
793unless ($File::Find::dont_use_nlink) {
794 require Config;
795 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
796}
9b33fb8e 797
f4eedc6b
DD
798# We need a function that checks if a scalar is tainted. Either use the
799# Scalar::Util module's tainted() function or our (slower) pure Perl
800# fallback is_tainted_pp()
801{
802 local $@;
803 eval { require Scalar::Util };
804 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
805}
9b33fb8e 806
f4eedc6b 8071;
f06db76b 808
f4eedc6b
DD
809__END__
810#
811# Modified to ensure sub-directory traversal order is not inverted by stack
812# push and pops. That is remains in the same order as in the directory file,
813# or user pre-processing (EG:sorted).
814#
6280b799 815
f4eedc6b 816=head1 NAME
a0d0e21e 817
f4eedc6b 818File::Find - Traverse a directory tree.
81793b90 819
f4eedc6b 820=head1 SYNOPSIS
81793b90 821
f4eedc6b
DD
822 use File::Find;
823 find(\&wanted, @directories_to_search);
824 sub wanted { ... }
9f826d6a 825
f4eedc6b
DD
826 use File::Find;
827 finddepth(\&wanted, @directories_to_search);
828 sub wanted { ... }
81793b90 829
f4eedc6b
DD
830 use File::Find;
831 find({ wanted => \&process, follow => 1 }, '.');
81793b90 832
f4eedc6b 833=head1 DESCRIPTION
81793b90 834
f4eedc6b
DD
835These are functions for searching through directory trees doing work
836on each file found similar to the Unix I<find> command. File::Find
837exports two functions, C<find> and C<finddepth>. They work similarly
838but have subtle differences.
81793b90 839
f4eedc6b 840=over 4
81793b90 841
f4eedc6b 842=item B<find>
81793b90 843
f4eedc6b
DD
844 find(\&wanted, @directories);
845 find(\%options, @directories);
81793b90 846
f4eedc6b
DD
847C<find()> does a depth-first search over the given C<@directories> in
848the order they are given. For each file or directory found, it calls
849the C<&wanted> subroutine. (See below for details on how to use the
850C<&wanted> function). Additionally, for each directory found, it will
851C<chdir()> into that directory and continue the search, invoking the
852C<&wanted> function on each file or subdirectory in the directory.
81793b90 853
f4eedc6b 854=item B<finddepth>
81793b90 855
f4eedc6b
DD
856 finddepth(\&wanted, @directories);
857 finddepth(\%options, @directories);
81793b90 858
f4eedc6b
DD
859C<finddepth()> works just like C<find()> except that it invokes the
860C<&wanted> function for a directory I<after> invoking it for the
861directory's contents. It does a postorder traversal instead of a
862preorder traversal, working from the bottom of the directory tree up
863where C<find()> works from the top of the tree down.
81793b90 864
f4eedc6b 865=back
81793b90 866
f4eedc6b 867=head2 %options
81793b90 868
f4eedc6b
DD
869The first argument to C<find()> is either a code reference to your
870C<&wanted> function, or a hash reference describing the operations
871to be performed for each file. The
872code reference is described in L<The wanted function> below.
81793b90 873
f4eedc6b 874Here are the possible keys for the hash:
81793b90 875
f4eedc6b 876=over 3
81793b90 877
f4eedc6b 878=item C<wanted>
81793b90 879
f4eedc6b
DD
880The value should be a code reference. This code reference is
881described in L<The wanted function> below. The C<&wanted> subroutine is
882mandatory.
7e47e6ff 883
f4eedc6b 884=item C<bydepth>
81793b90 885
f4eedc6b
DD
886Reports the name of a directory only AFTER all its entries
887have been reported. Entry point C<finddepth()> is a shortcut for
888specifying C<< { bydepth => 1 } >> in the first argument of C<find()>.
9f826d6a 889
f4eedc6b 890=item C<preprocess>
9f826d6a 891
f4eedc6b
DD
892The value should be a code reference. This code reference is used to
893preprocess the current directory. The name of the currently processed
894directory is in C<$File::Find::dir>. Your preprocessing function is
895called after C<readdir()>, but before the loop that calls the C<wanted()>
896function. It is called with a list of strings (actually file/directory
897names) and is expected to return a list of strings. The code can be
898used to sort the file/directory names alphabetically, numerically,
899or to filter out directory entries based on their name alone. When
900I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
a1ccf0c4 901
f4eedc6b 902=item C<postprocess>
a1ccf0c4 903
f4eedc6b
DD
904The value should be a code reference. It is invoked just before leaving
905the currently processed directory. It is called in void context with no
906arguments. The name of the current directory is in C<$File::Find::dir>. This
907hook is handy for summarizing a directory, such as calculating its disk
908usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
909no-op.
81793b90 910
f4eedc6b 911=item C<follow>
81793b90 912
f4eedc6b
DD
913Causes symbolic links to be followed. Since directory trees with symbolic
914links (followed) may contain files more than once and may even have
915cycles, a hash has to be built up with an entry for each file.
916This might be expensive both in space and time for a large
917directory tree. See L</follow_fast> and L</follow_skip> below.
918If either I<follow> or I<follow_fast> is in effect:
3555aed3 919
f4eedc6b 920=over 6
81793b90 921
f4eedc6b 922=item *
7e47e6ff 923
f4eedc6b
DD
924It is guaranteed that an I<lstat> has been called before the user's
925C<wanted()> function is called. This enables fast file checks involving S<_>.
926Note that this guarantee no longer holds if I<follow> or I<follow_fast>
927are not set.
3555aed3 928
f4eedc6b 929=item *
7e47e6ff 930
f4eedc6b
DD
931There is a variable C<$File::Find::fullname> which holds the absolute
932pathname of the file with all symbolic links resolved. If the link is
933a dangling symbolic link, then fullname will be set to C<undef>.
7e47e6ff 934
f4eedc6b 935=back
7e47e6ff 936
f4eedc6b 937This is a no-op on Win32.
7e47e6ff 938
f4eedc6b 939=item C<follow_fast>
81793b90 940
f4eedc6b
DD
941This is similar to I<follow> except that it may report some files more
942than once. It does detect cycles, however. Since only symbolic links
943have to be hashed, this is much cheaper both in space and time. If
944processing a file more than once (by the user's C<wanted()> function)
945is worse than just taking time, the option I<follow> should be used.
81793b90 946
f4eedc6b 947This is also a no-op on Win32.
81793b90 948
f4eedc6b 949=item C<follow_skip>
719911cc 950
f4eedc6b
DD
951C<follow_skip==1>, which is the default, causes all files which are
952neither directories nor symbolic links to be ignored if they are about
953to be processed a second time. If a directory or a symbolic link
954are about to be processed a second time, File::Find dies.
719911cc 955
f4eedc6b
DD
956C<follow_skip==0> causes File::Find to die if any file is about to be
957processed a second time.
81793b90 958
f4eedc6b
DD
959C<follow_skip==2> causes File::Find to ignore any duplicate files and
960directories but to proceed normally otherwise.
81793b90 961
f4eedc6b 962=item C<dangling_symlinks>
81793b90 963
f4eedc6b
DD
964If true and a code reference, will be called with the symbolic link
965name and the directory it lives in as arguments. Otherwise, if true
966and warnings are on, warning "symbolic_link_name is a dangling
967symbolic link\n" will be issued. If false, the dangling symbolic link
968will be silently ignored.
81793b90 969
f4eedc6b 970=item C<no_chdir>
7e47e6ff 971
f4eedc6b
DD
972Does not C<chdir()> to each directory as it recurses. The C<wanted()>
973function will need to be aware of this, of course. In this case,
974C<$_> will be the same as C<$File::Find::name>.
a1ccf0c4 975
f4eedc6b 976=item C<untaint>
a1ccf0c4 977
f4eedc6b
DD
978If find is used in taint-mode (-T command line switch or if EUID != UID
979or if EGID != GID) then internally directory names have to be untainted
980before they can be chdir'ed to. Therefore they are checked against a regular
981expression I<untaint_pattern>. Note that all names passed to the user's
982I<wanted()> function are still tainted. If this option is used while
983not in taint-mode, C<untaint> is a no-op.
a1ccf0c4 984
f4eedc6b 985=item C<untaint_pattern>
81793b90 986
f4eedc6b
DD
987See above. This should be set using the C<qr> quoting operator.
988The default is set to C<qr|^([-+@\w./]+)$|>.
989Note that the parentheses are vital.
7e47e6ff 990
f4eedc6b 991=item C<untaint_skip>
7e47e6ff 992
f4eedc6b
DD
993If set, a directory which fails the I<untaint_pattern> is skipped,
994including all its sub-directories. The default is to 'die' in such a case.
81793b90 995
f4eedc6b
DD
996=back
997
998=head2 The wanted function
999
1000The C<wanted()> function does whatever verifications you want on
1001each file and directory. Note that despite its name, the C<wanted()>
1002function is a generic callback function, and does B<not> tell
1003File::Find if a file is "wanted" or not. In fact, its return value
1004is ignored.
7e47e6ff 1005
f4eedc6b
DD
1006The wanted function takes no arguments but rather does its work
1007through a collection of variables.
81793b90 1008
f4eedc6b 1009=over 4
81793b90 1010
f4eedc6b 1011=item C<$File::Find::dir> is the current directory name,
81793b90 1012
f4eedc6b 1013=item C<$_> is the current filename within that directory
5fa2bf2b 1014
f4eedc6b 1015=item C<$File::Find::name> is the complete pathname to the file.
a1ccf0c4 1016
f4eedc6b 1017=back
81793b90 1018
f4eedc6b
DD
1019The above variables have all been localized and may be changed without
1020affecting data outside of the wanted function.
81793b90 1021
f4eedc6b 1022For example, when examining the file F</some/path/foo.ext> you will have:
7bd31527 1023
f4eedc6b
DD
1024 $File::Find::dir = /some/path/
1025 $_ = foo.ext
1026 $File::Find::name = /some/path/foo.ext
81793b90 1027
f4eedc6b
DD
1028You are chdir()'d to C<$File::Find::dir> when the function is called,
1029unless C<no_chdir> was specified. Note that when changing to
1030directories is in effect the root directory (F</>) is a somewhat
1031special case inasmuch as the concatenation of C<$File::Find::dir>,
1032C<'/'> and C<$_> is not literally equal to C<$File::Find::name>. The
1033table below summarizes all variants:
7e47e6ff 1034
f4eedc6b
DD
1035 $File::Find::name $File::Find::dir $_
1036 default / / .
1037 no_chdir=>0 /etc / etc
1038 /etc/x /etc x
7e47e6ff 1039
f4eedc6b
DD
1040 no_chdir=>1 / / /
1041 /etc / /etc
1042 /etc/x /etc /etc/x
a0d0e21e 1043
81793b90 1044
f4eedc6b
DD
1045When C<follow> or C<follow_fast> are in effect, there is
1046also a C<$File::Find::fullname>. The function may set
1047C<$File::Find::prune> to prune the tree unless C<bydepth> was
1048specified. Unless C<follow> or C<follow_fast> is specified, for
1049compatibility reasons (find.pl, find2perl) there are in addition the
1050following globals available: C<$File::Find::topdir>,
1051C<$File::Find::topdev>, C<$File::Find::topino>,
1052C<$File::Find::topmode> and C<$File::Find::topnlink>.
81793b90 1053
f4eedc6b 1054This library is useful for the C<find2perl> tool, which when fed,
7e47e6ff 1055
f4eedc6b
DD
1056 find2perl / -name .nfs\* -mtime +7 \
1057 -exec rm -f {} \; -o -fstype nfs -prune
81793b90 1058
f4eedc6b 1059produces something like:
7e47e6ff 1060
f4eedc6b
DD
1061 sub wanted {
1062 /^\.nfs.*\z/s &&
1063 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
1064 int(-M _) > 7 &&
1065 unlink($_)
1066 ||
1067 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
1068 $dev < 0 &&
1069 ($File::Find::prune = 1);
81793b90
GS
1070 }
1071
f4eedc6b
DD
1072Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
1073filehandle that caches the information from the preceding
1074C<stat()>, C<lstat()>, or filetest.
7e47e6ff 1075
f4eedc6b
DD
1076Here's another interesting wanted function. It will find all symbolic
1077links that don't resolve:
1078
1079 sub wanted {
1080 -l && !-e && print "bogus link: $File::Find::name\n";
1081 }
1082
1083Note that you may mix directories and (non-directory) files in the list of
1084directories to be searched by the C<wanted()> function.
81793b90 1085
f4eedc6b 1086 find(\&wanted, "./foo", "./bar", "./baz/epsilon");
81793b90 1087
f4eedc6b
DD
1088In the example above, no file in F<./baz/> other than F<./baz/epsilon> will be
1089evaluated by C<wanted()>.
81793b90 1090
f4eedc6b
DD
1091See also the script C<pfind> on CPAN for a nice application of this
1092module.
81793b90 1093
f4eedc6b 1094=head1 WARNINGS
81793b90 1095
f4eedc6b
DD
1096If you run your program with the C<-w> switch, or if you use the
1097C<warnings> pragma, File::Find will report warnings for several weird
1098situations. You can disable these warnings by putting the statement
a1ccf0c4 1099
f4eedc6b 1100 no warnings 'File::Find';
81793b90 1101
f4eedc6b
DD
1102in the appropriate scope. See L<warnings> for more info about lexical
1103warnings.
81793b90 1104
f4eedc6b 1105=head1 CAVEAT
7e47e6ff 1106
f4eedc6b 1107=over 2
81793b90 1108
f4eedc6b 1109=item $dont_use_nlink
7e47e6ff 1110
f4eedc6b
DD
1111You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
1112force File::Find to always stat directories. This was used for file systems
1113that do not have an C<nlink> count matching the number of sub-directories.
1114Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
1115system) and a couple of others.
a0d0e21e 1116
f4eedc6b
DD
1117You shouldn't need to set this variable, since File::Find should now detect
1118such file systems on-the-fly and switch itself to using stat. This works even
1119for parts of your file system, like a mounted CD-ROM.
81793b90 1120
f4eedc6b 1121If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
233a4069 1122
f4eedc6b 1123=item symlinks
a0d0e21e 1124
f4eedc6b
DD
1125Be aware that the option to follow symbolic links can be dangerous.
1126Depending on the structure of the directory tree (including symbolic
1127links to directories) you might traverse a given (physical) directory
1128more than once (only if C<follow_fast> is in effect).
1129Furthermore, deleting or changing files in a symbolically linked directory
1130might cause very unpleasant surprises, since you delete or change files
1131in an unknown directory.
a0d0e21e 1132
f4eedc6b 1133=back
6280b799 1134
f4eedc6b 1135=head1 BUGS AND CAVEATS
7e47e6ff 1136
f4eedc6b
DD
1137Despite the name of the C<finddepth()> function, both C<find()> and
1138C<finddepth()> perform a depth-first search of the directory
1139hierarchy.
748a9306 1140
f4eedc6b 1141=head1 HISTORY
7e47e6ff 1142
f4eedc6b
DD
1143File::Find used to produce incorrect results if called recursively.
1144During the development of perl 5.8 this bug was fixed.
1145The first fixed version of File::Find was 1.01.
6280b799 1146
f4eedc6b 1147=head1 SEE ALSO
20408e3c 1148
f4eedc6b 1149find, find2perl.
7e47e6ff 1150
f4eedc6b 1151=cut