This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [PATCH] ID 20011206.007 File::Find:name set to empty string for "/"
[perl5.git] / lib / File / Find.pm
... / ...
CommitLineData
1package File::Find;
2use 5.006;
3use strict;
4use warnings;
5use warnings::register;
6our $VERSION = '1.04';
7require Exporter;
8require Cwd;
9
10=head1 NAME
11
12find - traverse a file tree
13
14finddepth - traverse a directory structure depth-first
15
16=head1 SYNOPSIS
17
18 use File::Find;
19 find(\&wanted, '/foo', '/bar');
20 sub wanted { ... }
21
22 use File::Find;
23 finddepth(\&wanted, '/foo', '/bar');
24 sub wanted { ... }
25
26 use File::Find;
27 find({ wanted => \&process, follow => 1 }, '.');
28
29=head1 DESCRIPTION
30
31The first argument to find() is either a hash reference describing the
32operations to be performed for each file, or a code reference.
33
34Here are the possible keys for the hash:
35
36=over 3
37
38=item C<wanted>
39
40The value should be a code reference. This code reference is called
41I<the wanted() function> below.
42
43=item C<bydepth>
44
45Reports the name of a directory only AFTER all its entries
46have been reported. Entry point finddepth() is a shortcut for
47specifying C<{ bydepth => 1 }> in the first argument of find().
48
49=item C<preprocess>
50
51The value should be a code reference. This code reference is used to
52preprocess the current directory. The name of currently processed
53directory is in $File::Find::dir. Your preprocessing function is
54called after readdir() but before the loop that calls the wanted()
55function. It is called with a list of strings (actually file/directory
56names) and is expected to return a list of strings. The code can be
57used to sort the file/directory names alphabetically, numerically,
58or to filter out directory entries based on their name alone. When
59I<follow> or I<follow_fast> are in effect, C<preprocess> is a no-op.
60
61=item C<postprocess>
62
63The value should be a code reference. It is invoked just before leaving
64the currently processed directory. It is called in void context with no
65arguments. The name of the current directory is in $File::Find::dir. This
66hook is handy for summarizing a directory, such as calculating its disk
67usage. When I<follow> or I<follow_fast> are in effect, C<postprocess> is a
68no-op.
69
70=item C<follow>
71
72Causes symbolic links to be followed. Since directory trees with symbolic
73links (followed) may contain files more than once and may even have
74cycles, a hash has to be built up with an entry for each file.
75This might be expensive both in space and time for a large
76directory tree. See I<follow_fast> and I<follow_skip> below.
77If either I<follow> or I<follow_fast> is in effect:
78
79=over 6
80
81=item *
82
83It is guaranteed that an I<lstat> has been called before the user's
84I<wanted()> function is called. This enables fast file checks involving S< _>.
85
86=item *
87
88There is a variable C<$File::Find::fullname> which holds the absolute
89pathname of the file with all symbolic links resolved
90
91=back
92
93=item C<follow_fast>
94
95This is similar to I<follow> except that it may report some files more
96than once. It does detect cycles, however. Since only symbolic links
97have to be hashed, this is much cheaper both in space and time. If
98processing a file more than once (by the user's I<wanted()> function)
99is worse than just taking time, the option I<follow> should be used.
100
101=item C<follow_skip>
102
103C<follow_skip==1>, which is the default, causes all files which are
104neither directories nor symbolic links to be ignored if they are about
105to be processed a second time. If a directory or a symbolic link
106are about to be processed a second time, File::Find dies.
107C<follow_skip==0> causes File::Find to die if any file is about to be
108processed a second time.
109C<follow_skip==2> causes File::Find to ignore any duplicate files and
110directories but to proceed normally otherwise.
111
112=item C<dangling_symlinks>
113
114If true and a code reference, will be called with the symbolic link
115name and the directory it lives in as arguments. Otherwise, if true
116and warnings are on, warning "symbolic_link_name is a dangling
117symbolic link\n" will be issued. If false, the dangling symbolic link
118will be silently ignored.
119
120=item C<no_chdir>
121
122Does not C<chdir()> to each directory as it recurses. The wanted()
123function will need to be aware of this, of course. In this case,
124C<$_> will be the same as C<$File::Find::name>.
125
126=item C<untaint>
127
128If find is used in taint-mode (-T command line switch or if EUID != UID
129or if EGID != GID) then internally directory names have to be untainted
130before they can be chdir'ed to. Therefore they are checked against a regular
131expression I<untaint_pattern>. Note that all names passed to the user's
132I<wanted()> function are still tainted. If this option is used while
133not in taint-mode, C<untaint> is a no-op.
134
135=item C<untaint_pattern>
136
137See above. This should be set using the C<qr> quoting operator.
138The default is set to C<qr|^([-+@\w./]+)$|>.
139Note that the parentheses are vital.
140
141=item C<untaint_skip>
142
143If set, a directory which fails the I<untaint_pattern> is skipped,
144including all its sub-directories. The default is to 'die' in such a case.
145
146=back
147
148The wanted() function does whatever verifications you want.
149C<$File::Find::dir> contains the current directory name, and C<$_> the
150current filename within that directory. C<$File::Find::name> contains
151the complete pathname to the file. You are chdir()'d to
152C<$File::Find::dir> when the function is called, unless C<no_chdir>
153was specified. Note that when changing to directories is in effect
154the root directory (F</>) is a somewhat special case inasmuch as the
155concatenation of C<$File::Find::dir>, C<'/'> and C<$_> is not literally
156equal to C<$File::Find::name>. The table below summarizes all variants:
157
158 $File::Find::name $File::Find::dir $_
159 default / / .
160 no_chdir=>0 /etc / etc
161 /etc/x /etc x
162
163 no_chdir=>1 / / /
164 /etc / /etc
165 /etc/x /etc /etc/x
166
167
168When <follow> or <follow_fast> are in effect, there is
169also a C<$File::Find::fullname>. The function may set
170C<$File::Find::prune> to prune the tree unless C<bydepth> was
171specified. Unless C<follow> or C<follow_fast> is specified, for
172compatibility reasons (find.pl, find2perl) there are in addition the
173following globals available: C<$File::Find::topdir>,
174C<$File::Find::topdev>, C<$File::Find::topino>,
175C<$File::Find::topmode> and C<$File::Find::topnlink>.
176
177This library is useful for the C<find2perl> tool, which when fed,
178
179 find2perl / -name .nfs\* -mtime +7 \
180 -exec rm -f {} \; -o -fstype nfs -prune
181
182produces something like:
183
184 sub wanted {
185 /^\.nfs.*\z/s &&
186 (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
187 int(-M _) > 7 &&
188 unlink($_)
189 ||
190 ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
191 $dev < 0 &&
192 ($File::Find::prune = 1);
193 }
194
195Notice the C<_> in the above C<int(-M _)>: the C<_> is a magical
196filehandle that caches the information from the preceding
197stat(), lstat(), or filetest.
198
199Here's another interesting wanted function. It will find all symbolic
200links that don't resolve:
201
202 sub wanted {
203 -l && !-e && print "bogus link: $File::Find::name\n";
204 }
205
206See also the script C<pfind> on CPAN for a nice application of this
207module.
208
209=head1 WARNINGS
210
211If you run your program with the C<-w> switch, or if you use the
212C<warnings> pragma, File::Find will report warnings for several weird
213situations. You can disable these warnings by putting the statement
214
215 no warnings 'File::Find';
216
217in the appropriate scope. See L<perllexwarn> for more info about lexical
218warnings.
219
220=head1 CAVEAT
221
222=over 2
223
224=item $dont_use_nlink
225
226You can set the variable C<$File::Find::dont_use_nlink> to 1, if you want to
227force File::Find to always stat directories. This was used for file systems
228that do not have an C<nlink> count matching the number of sub-directories.
229Examples are ISO-9660 (CD-ROM), AFS, HPFS (OS/2 file system), FAT (DOS file
230system) and a couple of others.
231
232You shouldn't need to set this variable, since File::Find should now detect
233such file systems on-the-fly and switch itself to using stat. This works even
234for parts of your file system, like a mounted CD-ROM.
235
236If you do set C<$File::Find::dont_use_nlink> to 1, you will notice slow-downs.
237
238=item symlinks
239
240Be aware that the option to follow symbolic links can be dangerous.
241Depending on the structure of the directory tree (including symbolic
242links to directories) you might traverse a given (physical) directory
243more than once (only if C<follow_fast> is in effect).
244Furthermore, deleting or changing files in a symbolically linked directory
245might cause very unpleasant surprises, since you delete or change files
246in an unknown directory.
247
248=back
249
250=head1 NOTES
251
252=over 4
253
254=item *
255
256Mac OS (Classic) users should note a few differences:
257
258=over 4
259
260=item *
261
262The path separator is ':', not '/', and the current directory is denoted
263as ':', not '.'. You should be careful about specifying relative pathnames.
264While a full path always begins with a volume name, a relative pathname
265should always begin with a ':'. If specifying a volume name only, a
266trailing ':' is required.
267
268=item *
269
270C<$File::Find::dir> is guaranteed to end with a ':'. If C<$_>
271contains the name of a directory, that name may or may not end with a
272':'. Likewise, C<$File::Find::name>, which contains the complete
273pathname to that directory, and C<$File::Find::fullname>, which holds
274the absolute pathname of that directory with all symbolic links resolved,
275may or may not end with a ':'.
276
277=item *
278
279The default C<untaint_pattern> (see above) on Mac OS is set to
280C<qr|^(.+)$|>. Note that the parentheses are vital.
281
282=item *
283
284The invisible system file "Icon\015" is ignored. While this file may
285appear in every directory, there are some more invisible system files
286on every volume, which are all located at the volume root level (i.e.
287"MacintoshHD:"). These system files are B<not> excluded automatically.
288Your filter may use the following code to recognize invisible files or
289directories (requires Mac::Files):
290
291 use Mac::Files;
292
293 # invisible() -- returns 1 if file/directory is invisible,
294 # 0 if it's visible or undef if an error occurred
295
296 sub invisible($) {
297 my $file = shift;
298 my ($fileCat, $fileInfo);
299 my $invisible_flag = 1 << 14;
300
301 if ( $fileCat = FSpGetCatInfo($file) ) {
302 if ($fileInfo = $fileCat->ioFlFndrInfo() ) {
303 return (($fileInfo->fdFlags & $invisible_flag) && 1);
304 }
305 }
306 return undef;
307 }
308
309Generally, invisible files are system files, unless an odd application
310decides to use invisible files for its own purposes. To distinguish
311such files from system files, you have to look at the B<type> and B<creator>
312file attributes. The MacPerl built-in functions C<GetFileInfo(FILE)> and
313C<SetFileInfo(CREATOR, TYPE, FILES)> offer access to these attributes
314(see MacPerl.pm for details).
315
316Files that appear on the desktop actually reside in an (hidden) directory
317named "Desktop Folder" on the particular disk volume. Note that, although
318all desktop files appear to be on the same "virtual" desktop, each disk
319volume actually maintains its own "Desktop Folder" directory.
320
321=back
322
323=back
324
325=head1 HISTORY
326
327File::Find used to produce incorrect results if called recursively.
328During the development of perl 5.8 this bug was fixed.
329The first fixed version of File::Find was 1.01.
330
331=cut
332
333our @ISA = qw(Exporter);
334our @EXPORT = qw(find finddepth);
335
336
337use strict;
338my $Is_VMS;
339my $Is_MacOS;
340
341require File::Basename;
342require File::Spec;
343
344# Should ideally be my() not our() but local() currently
345# refuses to operate on lexicals
346
347our %SLnkSeen;
348our ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
349 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
350 $pre_process, $post_process, $dangling_symlinks);
351
352sub contract_name {
353 my ($cdir,$fn) = @_;
354
355 return substr($cdir,0,rindex($cdir,'/')) if $fn eq $File::Find::current_dir;
356
357 $cdir = substr($cdir,0,rindex($cdir,'/')+1);
358
359 $fn =~ s|^\./||;
360
361 my $abs_name= $cdir . $fn;
362
363 if (substr($fn,0,3) eq '../') {
364 1 while $abs_name =~ s!/[^/]*/\.\./!/!;
365 }
366
367 return $abs_name;
368}
369
370# return the absolute name of a directory or file
371sub contract_name_Mac {
372 my ($cdir,$fn) = @_;
373 my $abs_name;
374
375 if ($fn =~ /^(:+)(.*)$/) { # valid pathname starting with a ':'
376
377 my $colon_count = length ($1);
378 if ($colon_count == 1) {
379 $abs_name = $cdir . $2;
380 return $abs_name;
381 }
382 else {
383 # need to move up the tree, but
384 # only if it's not a volume name
385 for (my $i=1; $i<$colon_count; $i++) {
386 unless ($cdir =~ /^[^:]+:$/) { # volume name
387 $cdir =~ s/[^:]+:$//;
388 }
389 else {
390 return undef;
391 }
392 }
393 $abs_name = $cdir . $2;
394 return $abs_name;
395 }
396
397 }
398 else {
399
400 # $fn may be a valid path to a directory or file or (dangling)
401 # symlink, without a leading ':'
402 if ( (-e $fn) || (-l $fn) ) {
403 if ($fn =~ /^[^:]+:/) { # a volume name like DataHD:*
404 return $fn; # $fn is already an absolute path
405 }
406 else {
407 $abs_name = $cdir . $fn;
408 return $abs_name;
409 }
410 }
411 else { # argh!, $fn is not a valid directory/file
412 return undef;
413 }
414 }
415}
416
417sub PathCombine($$) {
418 my ($Base,$Name) = @_;
419 my $AbsName;
420
421 if ($Is_MacOS) {
422 # $Name is the resolved symlink (always a full path on MacOS),
423 # i.e. there's no need to call contract_name_Mac()
424 $AbsName = $Name;
425
426 # (simple) check for recursion
427 if ( ( $Base =~ /^$AbsName/) && (-d $AbsName) ) { # recursion
428 return undef;
429 }
430 }
431 else {
432 if (substr($Name,0,1) eq '/') {
433 $AbsName= $Name;
434 }
435 else {
436 $AbsName= contract_name($Base,$Name);
437 }
438
439 # (simple) check for recursion
440 my $newlen= length($AbsName);
441 if ($newlen <= length($Base)) {
442 if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
443 && $AbsName eq substr($Base,0,$newlen))
444 {
445 return undef;
446 }
447 }
448 }
449 return $AbsName;
450}
451
452sub Follow_SymLink($) {
453 my ($AbsName) = @_;
454
455 my ($NewName,$DEV, $INO);
456 ($DEV, $INO)= lstat $AbsName;
457
458 while (-l _) {
459 if ($SLnkSeen{$DEV, $INO}++) {
460 if ($follow_skip < 2) {
461 die "$AbsName is encountered a second time";
462 }
463 else {
464 return undef;
465 }
466 }
467 $NewName= PathCombine($AbsName, readlink($AbsName));
468 unless(defined $NewName) {
469 if ($follow_skip < 2) {
470 die "$AbsName is a recursive symbolic link";
471 }
472 else {
473 return undef;
474 }
475 }
476 else {
477 $AbsName= $NewName;
478 }
479 ($DEV, $INO) = lstat($AbsName);
480 return undef unless defined $DEV; # dangling symbolic link
481 }
482
483 if ($full_check && defined $DEV && $SLnkSeen{$DEV, $INO}++) {
484 if ( ($follow_skip < 1) || ((-d _) && ($follow_skip < 2)) ) {
485 die "$AbsName encountered a second time";
486 }
487 else {
488 return undef;
489 }
490 }
491
492 return $AbsName;
493}
494
495our($dir, $name, $fullname, $prune);
496sub _find_dir_symlnk($$$);
497sub _find_dir($$$);
498
499# check whether or not a scalar variable is tainted
500# (code straight from the Camel, 3rd ed., page 561)
501sub is_tainted_pp {
502 my $arg = shift;
503 my $nada = substr($arg, 0, 0); # zero-length
504 local $@;
505 eval { eval "# $nada" };
506 return length($@) != 0;
507}
508
509sub _find_opt {
510 my $wanted = shift;
511 die "invalid top directory" unless defined $_[0];
512
513 # This function must local()ize everything because callbacks may
514 # call find() or finddepth()
515
516 local %SLnkSeen;
517 local ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
518 $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat,
519 $pre_process, $post_process, $dangling_symlinks);
520 local($dir, $name, $fullname, $prune);
521
522 my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::getcwd();
523 my $cwd_untainted = $cwd;
524 my $check_t_cwd = 1;
525 $wanted_callback = $wanted->{wanted};
526 $bydepth = $wanted->{bydepth};
527 $pre_process = $wanted->{preprocess};
528 $post_process = $wanted->{postprocess};
529 $no_chdir = $wanted->{no_chdir};
530 $full_check = $wanted->{follow};
531 $follow = $full_check || $wanted->{follow_fast};
532 $follow_skip = $wanted->{follow_skip};
533 $untaint = $wanted->{untaint};
534 $untaint_pat = $wanted->{untaint_pattern};
535 $untaint_skip = $wanted->{untaint_skip};
536 $dangling_symlinks = $wanted->{dangling_symlinks};
537
538 # for compatibility reasons (find.pl, find2perl)
539 local our ($topdir, $topdev, $topino, $topmode, $topnlink);
540
541 # a symbolic link to a directory doesn't increase the link count
542 $avoid_nlink = $follow || $File::Find::dont_use_nlink;
543
544 my ($abs_dir, $Is_Dir);
545
546 Proc_Top_Item:
547 foreach my $TOP (@_) {
548 my $top_item = $TOP;
549
550 if ($Is_MacOS) {
551 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
552 $top_item = ":$top_item"
553 if ( (-d _) && ( $top_item !~ /:/ ) );
554 }
555 else {
556 $top_item =~ s|/\z|| unless $top_item eq '/';
557 ($topdev,$topino,$topmode,$topnlink) = $follow ? stat $top_item : lstat $top_item;
558 }
559
560 $Is_Dir= 0;
561
562 if ($follow) {
563
564 if ($Is_MacOS) {
565 $cwd = "$cwd:" unless ($cwd =~ /:$/); # for safety
566
567 if ($top_item eq $File::Find::current_dir) {
568 # avoid empty name after return to '/'
569 $name = '/' unless length( $name );
570 $abs_dir = $cwd;
571 }
572 else {
573 $abs_dir = contract_name_Mac($cwd, $top_item);
574 unless (defined $abs_dir) {
575 warnings::warnif "Can't determine absolute path for $top_item (No such file or directory)\n";
576 next Proc_Top_Item;
577 }
578 }
579
580 }
581 else {
582 if (substr($top_item,0,1) eq '/') {
583 $abs_dir = $top_item;
584 }
585 elsif ($top_item eq $File::Find::current_dir) {
586 $abs_dir = $cwd;
587 }
588 else { # care about any ../
589 $abs_dir = contract_name("$cwd/",$top_item);
590 }
591 }
592 $abs_dir= Follow_SymLink($abs_dir);
593 unless (defined $abs_dir) {
594 if ($dangling_symlinks) {
595 if (ref $dangling_symlinks eq 'CODE') {
596 $dangling_symlinks->($top_item, $cwd);
597 } else {
598 warnings::warnif "$top_item is a dangling symbolic link\n";
599 }
600 }
601 next Proc_Top_Item;
602 }
603
604 if (-d _) {
605 _find_dir_symlnk($wanted, $abs_dir, $top_item);
606 $Is_Dir= 1;
607 }
608 }
609 else { # no follow
610 $topdir = $top_item;
611 unless (defined $topnlink) {
612 warnings::warnif "Can't stat $top_item: $!\n";
613 next Proc_Top_Item;
614 }
615 if (-d _) {
616 $top_item =~ s/\.dir\z// if $Is_VMS;
617 _find_dir($wanted, $top_item, $topnlink);
618 $Is_Dir= 1;
619 }
620 else {
621 $abs_dir= $top_item;
622 }
623 }
624
625 unless ($Is_Dir) {
626 unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
627 if ($Is_MacOS) {
628 ($dir,$_) = (':', $top_item); # $File::Find::dir, $_
629 }
630 else {
631 ($dir,$_) = ('./', $top_item);
632 }
633 }
634
635 $abs_dir = $dir;
636 if (( $untaint ) && (is_tainted($dir) )) {
637 ( $abs_dir ) = $dir =~ m|$untaint_pat|;
638 unless (defined $abs_dir) {
639 if ($untaint_skip == 0) {
640 die "directory $dir is still tainted";
641 }
642 else {
643 next Proc_Top_Item;
644 }
645 }
646 }
647
648 unless ($no_chdir || chdir $abs_dir) {
649 warnings::warnif "Couldn't chdir $abs_dir: $!\n";
650 next Proc_Top_Item;
651 }
652
653 $name = $abs_dir . $_; # $File::Find::name
654
655 { &$wanted_callback }; # protect against wild "next"
656
657 }
658
659 unless ( $no_chdir ) {
660 if ( ($check_t_cwd) && (($untaint) && (is_tainted($cwd) )) ) {
661 ( $cwd_untainted ) = $cwd =~ m|$untaint_pat|;
662 unless (defined $cwd_untainted) {
663 die "insecure cwd in find(depth)";
664 }
665 $check_t_cwd = 0;
666 }
667 unless (chdir $cwd_untainted) {
668 die "Can't cd to $cwd: $!\n";
669 }
670 }
671 }
672}
673
674# API:
675# $wanted
676# $p_dir : "parent directory"
677# $nlink : what came back from the stat
678# preconditions:
679# chdir (if not no_chdir) to dir
680
681sub _find_dir($$$) {
682 my ($wanted, $p_dir, $nlink) = @_;
683 my ($CdLvl,$Level) = (0,0);
684 my @Stack;
685 my @filenames;
686 my ($subcount,$sub_nlink);
687 my $SE= [];
688 my $dir_name= $p_dir;
689 my $dir_pref;
690 my $dir_rel = $File::Find::current_dir;
691 my $tainted = 0;
692 my $no_nlink;
693
694 if ($Is_MacOS) {
695 $dir_pref= ($p_dir =~ /:$/) ? $p_dir : "$p_dir:"; # preface
696 }
697 else {
698 $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
699 }
700
701 local ($dir, $name, $prune, *DIR);
702
703 unless ( $no_chdir || ($p_dir eq $File::Find::current_dir)) {
704 my $udir = $p_dir;
705 if (( $untaint ) && (is_tainted($p_dir) )) {
706 ( $udir ) = $p_dir =~ m|$untaint_pat|;
707 unless (defined $udir) {
708 if ($untaint_skip == 0) {
709 die "directory $p_dir is still tainted";
710 }
711 else {
712 return;
713 }
714 }
715 }
716 unless (chdir $udir) {
717 warnings::warnif "Can't cd to $udir: $!\n";
718 return;
719 }
720 }
721
722 # push the starting directory
723 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
724
725 if ($Is_MacOS) {
726 $p_dir = $dir_pref; # ensure trailing ':'
727 }
728
729 while (defined $SE) {
730 unless ($bydepth) {
731 $dir= $p_dir; # $File::Find::dir
732 $name= $dir_name; # $File::Find::name
733 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
734 # prune may happen here
735 $prune= 0;
736 # guarantee lstat for directory
737 lstat( $dir_name );
738 { &$wanted_callback }; # protect against wild "next"
739 next if $prune;
740 }
741
742 # change to that directory
743 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
744 my $udir= $dir_rel;
745 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_rel) )) ) {
746 ( $udir ) = $dir_rel =~ m|$untaint_pat|;
747 unless (defined $udir) {
748 if ($untaint_skip == 0) {
749 if ($Is_MacOS) {
750 die "directory ($p_dir) $dir_rel is still tainted";
751 }
752 else {
753 die "directory (" . ($p_dir ne '/' ? $p_dir : '') . "/) $dir_rel is still tainted";
754 }
755 } else { # $untaint_skip == 1
756 next;
757 }
758 }
759 }
760 unless (chdir $udir) {
761 if ($Is_MacOS) {
762 warnings::warnif "Can't cd to ($p_dir) $udir: $!\n";
763 }
764 else {
765 warnings::warnif "Can't cd to (" .
766 ($p_dir ne '/' ? $p_dir : '') . "/) $udir: $!\n";
767 }
768 next;
769 }
770 $CdLvl++;
771 }
772
773 if ($Is_MacOS) {
774 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
775 }
776
777 $dir= $dir_name; # $File::Find::dir
778
779 # Get the list of files in the current directory.
780 unless (opendir DIR, ($no_chdir ? $dir_name : $File::Find::current_dir)) {
781 warnings::warnif "Can't opendir($dir_name): $!\n";
782 next;
783 }
784 @filenames = readdir DIR;
785 closedir(DIR);
786 @filenames = &$pre_process(@filenames) if $pre_process;
787 push @Stack,[$CdLvl,$dir_name,"",-2] if $post_process;
788
789 # default: use whatever was specifid
790 # (if $nlink >= 2, and $avoid_nlink == 0, this will switch back)
791 $no_nlink = $avoid_nlink;
792 # if dir has wrong nlink count, force switch to slower stat method
793 $no_nlink = 1 if ($nlink < 2);
794
795 if ($nlink == 2 && !$no_nlink) {
796 # This dir has no subdirectories.
797 for my $FN (@filenames) {
798 next if $FN =~ $File::Find::skip_pattern;
799
800 $name = $dir_pref . $FN; # $File::Find::name
801 $_ = ($no_chdir ? $name : $FN); # $_
802 { &$wanted_callback }; # protect against wild "next"
803 }
804
805 }
806 else {
807 # This dir has subdirectories.
808 $subcount = $nlink - 2;
809
810 for my $FN (@filenames) {
811 next if $FN =~ $File::Find::skip_pattern;
812 if ($subcount > 0 || $no_nlink) {
813 # Seen all the subdirs?
814 # check for directoriness.
815 # stat is faster for a file in the current directory
816 $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
817
818 if (-d _) {
819 --$subcount;
820 $FN =~ s/\.dir\z// if $Is_VMS;
821 push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
822 }
823 else {
824 $name = $dir_pref . $FN; # $File::Find::name
825 $_= ($no_chdir ? $name : $FN); # $_
826 { &$wanted_callback }; # protect against wild "next"
827 }
828 }
829 else {
830 $name = $dir_pref . $FN; # $File::Find::name
831 $_= ($no_chdir ? $name : $FN); # $_
832 { &$wanted_callback }; # protect against wild "next"
833 }
834 }
835 }
836 }
837 continue {
838 while ( defined ($SE = pop @Stack) ) {
839 ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
840 if ($CdLvl > $Level && !$no_chdir) {
841 my $tmp;
842 if ($Is_MacOS) {
843 $tmp = (':' x ($CdLvl-$Level)) . ':';
844 }
845 else {
846 $tmp = join('/',('..') x ($CdLvl-$Level));
847 }
848 die "Can't cd to $dir_name" . $tmp
849 unless chdir ($tmp);
850 $CdLvl = $Level;
851 }
852
853 if ($Is_MacOS) {
854 # $pdir always has a trailing ':', except for the starting dir,
855 # where $dir_rel eq ':'
856 $dir_name = "$p_dir$dir_rel";
857 $dir_pref = "$dir_name:";
858 }
859 else {
860 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
861 $dir_pref = "$dir_name/";
862 }
863
864 if ( $nlink == -2 ) {
865 $name = $dir = $p_dir; # $File::Find::name / dir
866 $_ = $File::Find::current_dir;
867 &$post_process; # End-of-directory processing
868 }
869 elsif ( $nlink < 0 ) { # must be finddepth, report dirname now
870 $name = $dir_name;
871 if ($Is_MacOS) {
872 if ($dir_rel eq ':') { # must be the top dir, where we started
873 $name =~ s|:$||; # $File::Find::name
874 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
875 }
876 $dir = $p_dir; # $File::Find::dir
877 $_ = ($no_chdir ? $name : $dir_rel); # $_
878 }
879 else {
880 if ( substr($name,-2) eq '/.' ) {
881 substr($name, length($name) == 2 ? -1 : -2) = '';
882 }
883 $dir = $p_dir;
884 $_ = ($no_chdir ? $dir_name : $dir_rel );
885 if ( substr($_,-2) eq '/.' ) {
886 substr($_, length($_) == 2 ? -1 : -2) = '';
887 }
888 }
889 # guarantee lstat at return to directory
890 lstat( $dir_name );
891 { &$wanted_callback }; # protect against wild "next"
892 }
893 else {
894 push @Stack,[$CdLvl,$p_dir,$dir_rel,-1] if $bydepth;
895 last;
896 }
897 }
898 }
899}
900
901
902# API:
903# $wanted
904# $dir_loc : absolute location of a dir
905# $p_dir : "parent directory"
906# preconditions:
907# chdir (if not no_chdir) to dir
908
909sub _find_dir_symlnk($$$) {
910 my ($wanted, $dir_loc, $p_dir) = @_; # $dir_loc is the absolute directory
911 my @Stack;
912 my @filenames;
913 my $new_loc;
914 my $updir_loc = $dir_loc; # untainted parent directory
915 my $SE = [];
916 my $dir_name = $p_dir;
917 my $dir_pref;
918 my $loc_pref;
919 my $dir_rel = $File::Find::current_dir;
920 my $byd_flag; # flag for pending stack entry if $bydepth
921 my $tainted = 0;
922 my $ok = 1;
923
924 if ($Is_MacOS) {
925 $dir_pref = ($p_dir =~ /:$/) ? "$p_dir" : "$p_dir:";
926 $loc_pref = ($dir_loc =~ /:$/) ? "$dir_loc" : "$dir_loc:";
927 } else {
928 $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
929 $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
930 }
931
932 local ($dir, $name, $fullname, $prune, *DIR);
933
934 unless ($no_chdir) {
935 # untaint the topdir
936 if (( $untaint ) && (is_tainted($dir_loc) )) {
937 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|; # parent dir, now untainted
938 # once untainted, $updir_loc is pushed on the stack (as parent directory);
939 # hence, we don't need to untaint the parent directory every time we chdir
940 # to it later
941 unless (defined $updir_loc) {
942 if ($untaint_skip == 0) {
943 die "directory $dir_loc is still tainted";
944 }
945 else {
946 return;
947 }
948 }
949 }
950 $ok = chdir($updir_loc) unless ($p_dir eq $File::Find::current_dir);
951 unless ($ok) {
952 warnings::warnif "Can't cd to $updir_loc: $!\n";
953 return;
954 }
955 }
956
957 push @Stack,[$dir_loc,$updir_loc,$p_dir,$dir_rel,-1] if $bydepth;
958
959 if ($Is_MacOS) {
960 $p_dir = $dir_pref; # ensure trailing ':'
961 }
962
963 while (defined $SE) {
964
965 unless ($bydepth) {
966 # change (back) to parent directory (always untainted)
967 unless ($no_chdir) {
968 unless (chdir $updir_loc) {
969 warnings::warnif "Can't cd to $updir_loc: $!\n";
970 next;
971 }
972 }
973 $dir= $p_dir; # $File::Find::dir
974 $name= $dir_name; # $File::Find::name
975 $_= ($no_chdir ? $dir_name : $dir_rel ); # $_
976 $fullname= $dir_loc; # $File::Find::fullname
977 # prune may happen here
978 $prune= 0;
979 lstat($_); # make sure file tests with '_' work
980 { &$wanted_callback }; # protect against wild "next"
981 next if $prune;
982 }
983
984 # change to that directory
985 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
986 $updir_loc = $dir_loc;
987 if ( ($untaint) && (($tainted) || ($tainted = is_tainted($dir_loc) )) ) {
988 # untaint $dir_loc, what will be pushed on the stack as (untainted) parent dir
989 ( $updir_loc ) = $dir_loc =~ m|$untaint_pat|;
990 unless (defined $updir_loc) {
991 if ($untaint_skip == 0) {
992 die "directory $dir_loc is still tainted";
993 }
994 else {
995 next;
996 }
997 }
998 }
999 unless (chdir $updir_loc) {
1000 warnings::warnif "Can't cd to $updir_loc: $!\n";
1001 next;
1002 }
1003 }
1004
1005 if ($Is_MacOS) {
1006 $dir_name = "$dir_name:" unless ($dir_name =~ /:$/);
1007 }
1008
1009 $dir = $dir_name; # $File::Find::dir
1010
1011 # Get the list of files in the current directory.
1012 unless (opendir DIR, ($no_chdir ? $dir_loc : $File::Find::current_dir)) {
1013 warnings::warnif "Can't opendir($dir_loc): $!\n";
1014 next;
1015 }
1016 @filenames = readdir DIR;
1017 closedir(DIR);
1018
1019 for my $FN (@filenames) {
1020 next if $FN =~ $File::Find::skip_pattern;
1021
1022 # follow symbolic links / do an lstat
1023 $new_loc = Follow_SymLink($loc_pref.$FN);
1024
1025 # ignore if invalid symlink
1026 next unless defined $new_loc;
1027
1028 if (-d _) {
1029 push @Stack,[$new_loc,$updir_loc,$dir_name,$FN,1];
1030 }
1031 else {
1032 $fullname = $new_loc; # $File::Find::fullname
1033 $name = $dir_pref . $FN; # $File::Find::name
1034 $_ = ($no_chdir ? $name : $FN); # $_
1035 { &$wanted_callback }; # protect against wild "next"
1036 }
1037 }
1038
1039 }
1040 continue {
1041 while (defined($SE = pop @Stack)) {
1042 ($dir_loc, $updir_loc, $p_dir, $dir_rel, $byd_flag) = @$SE;
1043 if ($Is_MacOS) {
1044 # $p_dir always has a trailing ':', except for the starting dir,
1045 # where $dir_rel eq ':'
1046 $dir_name = "$p_dir$dir_rel";
1047 $dir_pref = "$dir_name:";
1048 $loc_pref = ($dir_loc =~ /:$/) ? $dir_loc : "$dir_loc:";
1049 }
1050 else {
1051 $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
1052 $dir_pref = "$dir_name/";
1053 $loc_pref = "$dir_loc/";
1054 }
1055 if ( $byd_flag < 0 ) { # must be finddepth, report dirname now
1056 unless ($no_chdir || ($dir_rel eq $File::Find::current_dir)) {
1057 unless (chdir $updir_loc) { # $updir_loc (parent dir) is always untainted
1058 warnings::warnif "Can't cd to $updir_loc: $!\n";
1059 next;
1060 }
1061 }
1062 $fullname = $dir_loc; # $File::Find::fullname
1063 $name = $dir_name; # $File::Find::name
1064 if ($Is_MacOS) {
1065 if ($dir_rel eq ':') { # must be the top dir, where we started
1066 $name =~ s|:$||; # $File::Find::name
1067 $p_dir = "$p_dir:" unless ($p_dir =~ /:$/);
1068 }
1069 $dir = $p_dir; # $File::Find::dir
1070 $_ = ($no_chdir ? $name : $dir_rel); # $_
1071 }
1072 else {
1073 if ( substr($name,-2) eq '/.' ) {
1074 $name =~ s|/\.$||; # $File::Find::name
1075 }
1076 $dir = $p_dir; # $File::Find::dir
1077 $_ = ($no_chdir ? $dir_name : $dir_rel); # $_
1078 if ( substr($_,-2) eq '/.' ) {
1079 s|/\.$||;
1080 }
1081 }
1082
1083 lstat($_); # make sure file tests with '_' work
1084 { &$wanted_callback }; # protect against wild "next"
1085 }
1086 else {
1087 push @Stack,[$dir_loc, $updir_loc, $p_dir, $dir_rel,-1] if $bydepth;
1088 last;
1089 }
1090 }
1091 }
1092}
1093
1094
1095sub wrap_wanted {
1096 my $wanted = shift;
1097 if ( ref($wanted) eq 'HASH' ) {
1098 if ( $wanted->{follow} || $wanted->{follow_fast}) {
1099 $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
1100 }
1101 if ( $wanted->{untaint} ) {
1102 $wanted->{untaint_pattern} = $File::Find::untaint_pattern
1103 unless defined $wanted->{untaint_pattern};
1104 $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
1105 }
1106 return $wanted;
1107 }
1108 else {
1109 return { wanted => $wanted };
1110 }
1111}
1112
1113sub find {
1114 my $wanted = shift;
1115 _find_opt(wrap_wanted($wanted), @_);
1116}
1117
1118sub finddepth {
1119 my $wanted = wrap_wanted(shift);
1120 $wanted->{bydepth} = 1;
1121 _find_opt($wanted, @_);
1122}
1123
1124# default
1125$File::Find::skip_pattern = qr/^\.{1,2}\z/;
1126$File::Find::untaint_pattern = qr|^([-+@\w./]+)$|;
1127
1128# These are hard-coded for now, but may move to hint files.
1129if ($^O eq 'VMS') {
1130 $Is_VMS = 1;
1131 $File::Find::dont_use_nlink = 1;
1132}
1133elsif ($^O eq 'MacOS') {
1134 $Is_MacOS = 1;
1135 $File::Find::dont_use_nlink = 1;
1136 $File::Find::skip_pattern = qr/^Icon\015\z/;
1137 $File::Find::untaint_pattern = qr|^(.+)$|;
1138}
1139
1140# this _should_ work properly on all platforms
1141# where File::Find can be expected to work
1142$File::Find::current_dir = File::Spec->curdir || '.';
1143
1144$File::Find::dont_use_nlink = 1
1145 if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32' ||
1146 $^O eq 'cygwin' || $^O eq 'epoc' || $^O eq 'NetWare';
1147
1148# Set dont_use_nlink in your hint file if your system's stat doesn't
1149# report the number of links in a directory as an indication
1150# of the number of files.
1151# See, e.g. hints/machten.sh for MachTen 2.2.
1152unless ($File::Find::dont_use_nlink) {
1153 require Config;
1154 $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
1155}
1156
1157# We need a function that checks if a scalar is tainted. Either use the
1158# Scalar::Util module's tainted() function or our (slower) pure Perl
1159# fallback is_tainted_pp()
1160{
1161 local $@;
1162 eval { require Scalar::Util };
1163 *is_tainted = $@ ? \&is_tainted_pp : \&Scalar::Util::tainted;
1164}
1165
11661;