Commit | Line | Data |
---|---|---|
6e7dc4a9 JH |
1 | #!/usr/bin/perl |
2 | ||
3 | use strict; | |
4 | use warnings 'all'; | |
5 | ||
6 | use LWP::Simple qw /$ua getstore/; | |
6e7dc4a9 | 7 | |
6e7dc4a9 JH |
8 | my %urls; |
9 | ||
10 | my @dummy = qw( | |
11 | http://something.here | |
12 | http://www.pvhp.com | |
13 | ); | |
14 | my %dummy; | |
15 | ||
16 | @dummy{@dummy} = (); | |
17 | ||
058eaa42 | 18 | foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) { |
6e7dc4a9 JH |
19 | open my $fh => $file or die "Failed to open $file: $!\n"; |
20 | while (<$fh>) { | |
21 | if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) { | |
22 | my $url = $&; | |
23 | $url =~ s/\.$//; | |
24 | $urls {$url} ||= { }; | |
25 | $urls {$url} {$file} = 1; | |
26 | } | |
27 | } | |
28 | close $fh; | |
29 | } | |
30 | ||
0d6d7233 JH |
31 | sub fisher_yates_shuffle { |
32 | my $deck = shift; # $deck is a reference to an array | |
33 | my $i = @$deck; | |
34 | while (--$i) { | |
35 | my $j = int rand ($i+1); | |
36 | @$deck[$i,$j] = @$deck[$j,$i]; | |
37 | } | |
38 | } | |
39 | ||
6e7dc4a9 JH |
40 | my @urls = keys %urls; |
41 | ||
0d6d7233 JH |
42 | fisher_yates_shuffle(\@urls); |
43 | ||
44 | sub todo { | |
45 | warn "(", scalar @urls, " URLs)\n"; | |
46 | } | |
47 | ||
48 | my $MAXPROC = 40; | |
49 | my $MAXURL = 10; | |
50 | my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL; | |
51 | ||
52 | select(STDERR); $| = 1; | |
53 | select(STDOUT); $| = 1; | |
54 | ||
6e7dc4a9 | 55 | while (@urls) { |
0d6d7233 | 56 | my @list; |
6e7dc4a9 | 57 | my $pid; |
0d6d7233 JH |
58 | my $i; |
59 | ||
60 | todo(); | |
61 | ||
62 | for ($i = 0; $i < $MAXFORK; $i++) { | |
63 | $list[$i] = [ splice @urls, 0, $MAXURL ]; | |
6e7dc4a9 | 64 | $pid = fork; |
0d6d7233 JH |
65 | die "Failed to fork: $!\n" unless defined $pid; |
66 | last unless $pid; # Child. | |
67 | } | |
68 | ||
69 | if ($pid) { | |
70 | # Parent. | |
71 | warn "(waiting)\n"; | |
72 | 1 until -1 == wait; # Reap. | |
73 | } else { | |
6e7dc4a9 | 74 | # Child. |
0d6d7233 | 75 | foreach my $url (@{$list[$i]}) { |
6e7dc4a9 JH |
76 | my $code = getstore $url, "/dev/null"; |
77 | next if $code == 200; | |
78 | my $f = join ", " => keys %{$urls {$url}}; | |
79 | printf "%03d %s: %s\n" => $code, $url, $f; | |
80 | } | |
81 | ||
82 | exit; | |
83 | } | |
84 | } | |
85 | ||
6e7dc4a9 | 86 | __END__ |