| 1 | #!/usr/bin/perl |
| 2 | |
| 3 | use strict; |
| 4 | use warnings 'all'; |
| 5 | |
| 6 | use LWP::Simple qw /$ua getstore/; |
| 7 | |
| 8 | my %urls; |
| 9 | |
| 10 | my @dummy = qw( |
| 11 | http://something.here |
| 12 | http://www.pvhp.com |
| 13 | ); |
| 14 | my %dummy; |
| 15 | |
| 16 | @dummy{@dummy} = (); |
| 17 | |
| 18 | foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) { |
| 19 | open my $fh => $file or die "Failed to open $file: $!\n"; |
| 20 | while (<$fh>) { |
| 21 | if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) { |
| 22 | my $url = $&; |
| 23 | $url =~ s/\.$//; |
| 24 | $urls {$url} ||= { }; |
| 25 | $urls {$url} {$file} = 1; |
| 26 | } |
| 27 | } |
| 28 | close $fh; |
| 29 | } |
| 30 | |
| 31 | sub fisher_yates_shuffle { |
| 32 | my $deck = shift; # $deck is a reference to an array |
| 33 | my $i = @$deck; |
| 34 | while (--$i) { |
| 35 | my $j = int rand ($i+1); |
| 36 | @$deck[$i,$j] = @$deck[$j,$i]; |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | my @urls = keys %urls; |
| 41 | |
| 42 | fisher_yates_shuffle(\@urls); |
| 43 | |
| 44 | sub todo { |
| 45 | warn "(", scalar @urls, " URLs)\n"; |
| 46 | } |
| 47 | |
| 48 | my $MAXPROC = 40; |
| 49 | my $MAXURL = 10; |
| 50 | my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL; |
| 51 | |
| 52 | select(STDERR); $| = 1; |
| 53 | select(STDOUT); $| = 1; |
| 54 | |
| 55 | while (@urls) { |
| 56 | my @list; |
| 57 | my $pid; |
| 58 | my $i; |
| 59 | |
| 60 | todo(); |
| 61 | |
| 62 | for ($i = 0; $i < $MAXFORK; $i++) { |
| 63 | $list[$i] = [ splice @urls, 0, $MAXURL ]; |
| 64 | $pid = fork; |
| 65 | die "Failed to fork: $!\n" unless defined $pid; |
| 66 | last unless $pid; # Child. |
| 67 | } |
| 68 | |
| 69 | if ($pid) { |
| 70 | # Parent. |
| 71 | warn "(waiting)\n"; |
| 72 | 1 until -1 == wait; # Reap. |
| 73 | } else { |
| 74 | # Child. |
| 75 | foreach my $url (@{$list[$i]}) { |
| 76 | my $code = getstore $url, "/dev/null"; |
| 77 | next if $code == 200; |
| 78 | my $f = join ", " => keys %{$urls {$url}}; |
| 79 | printf "%03d %s: %s\n" => $code, $url, $f; |
| 80 | } |
| 81 | |
| 82 | exit; |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | __END__ |