6 require File::Find::Rule;
9 require IO::Socket::SSL;
10 use List::Util qw(sum);
11 require LWP::UserAgent;
13 require Parallel::Fork::BossWorkerAsync;
14 require Term::ProgressBar::Simple;
15 require URI::Find::Simple;
19 while ( my $line = <main::DATA> ) {
21 next if $line =~ /^#/;
26 my $ua = LWP::UserAgent->new(ssl_opts => { verify_hostname => 0 });
30 my @filenames = @ARGV;
31 @filenames = sort grep { $_ !~ /^\.git/ } File::Find::Rule->new->file->in('.')
34 my $total_bytes = sum map {-s} @filenames;
36 my $extract_progress = Term::ProgressBar::Simple->new(
37 { count => $total_bytes,
38 name => 'Extracting URIs',
43 foreach my $filename (@filenames) {
44 next if $filename =~ /uris\.txt/;
45 next if $filename =~ /check_uris/;
46 next if $filename =~ /\.patch$/;
47 next if $filename =~ 'cpan/Pod-Simple/t/perlfaqo?\.pod';
48 next if $filename =~ /checkURL\.pl$/;
49 my $contents = File::Slurp::read_file($filename);
50 my @uris = URI::Find::Simple::list_uris($contents);
51 foreach my $uri (@uris) {
52 next unless $uri =~ /^(http|ftp)/;
53 next if $ignore{$uri};
55 # no need to hit rt.perl.org
57 if $uri =~ m{^https?://rt.perl.org/(?:rt3/)?Ticket/Display.html?id=\d+$};
59 # no need to hit rt.cpan.org
61 if $uri =~ m{^https?://rt.cpan.org/Public/Bug/Display.html?id=\d+$};
63 # no need to hit google groups (weird redirect LWP does not like)
65 if $uri =~ m{^http://groups\.google\.com/};
67 push @{ $uris{$uri} }, $filename;
69 $extract_progress += -s $filename;
72 my $bw = Parallel::Fork::BossWorkerAsync->new(
73 work_handler => \&work_alarmed,
74 global_timeout => 120,
78 foreach my $uri ( keys %uris ) {
79 my @filenames = @{ $uris{$uri} };
80 $bw->add_work( { uri => $uri, filenames => \@filenames } );
83 undef $extract_progress;
85 my $fetch_progress = Term::ProgressBar::Simple->new(
86 { count => scalar( keys %uris ),
87 name => 'Fetching URIs',
92 while ( $bw->pending() ) {
93 my $response = $bw->get_result();
94 my $uri = $response->{uri};
95 my @filenames = @{ $response->{filenames} };
96 my $is_success = $response->{is_success};
97 my $message = $response->{message};
99 unless ($is_success) {
100 foreach my $filename (@filenames) {
101 push @{ $filenames{$filename} },
102 { uri => $uri, message => $message };
109 my $fh = IO::File->new('> uris.txt');
110 foreach my $filename ( sort keys %filenames ) {
111 $fh->say("* $filename");
112 my @bits = @{ $filenames{$filename} };
113 foreach my $bit (@bits) {
114 my $uri = $bit->{uri};
115 my $message = $bit->{message};
117 $fh->say(" $message");
122 say 'Finished, see uris.txt';
127 local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required
133 $conf->{is_success} = 0;
134 $conf->{message} = 'Timed out';
142 my $uri = $conf->{uri};
143 my @filenames = @{ $conf->{filenames} };
145 if ( $uri =~ /^http/ ) {
146 my $uri_without_fragment = URI->new($uri);
147 my $fragment = $uri_without_fragment->fragment(undef);
148 my $response = $ua->head($uri_without_fragment);
150 $conf->{is_success} = $response->is_success;
151 $conf->{message} = $response->status_line;
155 my $uri_object = URI->new($uri);
156 my $host = $uri_object->host;
157 my $path = $uri_object->path;
158 my ( $volume, $directories, $filename )
159 = File::Spec->splitpath($path);
161 my $ftp = Net::FTP->new( $host, Passive => 1, Timeout => 60 );
163 $conf->{is_succcess} = 0;
164 $conf->{message} = "Can not connect to $host: $@";
168 my $can_login = $ftp->login( "anonymous", '-anonymous@' );
169 unless ($can_login) {
170 $conf->{is_success} = 0;
171 $conf->{message} = "Can not login ", $ftp->message;
175 my $can_binary = $ftp->binary();
176 unless ($can_binary) {
177 $conf->{is_success} = 0;
178 $conf->{message} = "Can not binary ", $ftp->message;
182 my $can_cwd = $ftp->cwd($directories);
184 $conf->{is_success} = 0;
185 $conf->{message} = "Can not cwd to $directories ", $ftp->message;
190 my $can_size = $ftp->size($filename);
192 $conf->{is_success} = 0;
194 = "Can not size $filename in $directories",
199 my ($can_dir) = $ftp->dir;
201 my ($can_ls) = $ftp->ls;
203 $conf->{is_success} = 0;
205 = "Can not dir or ls in $directories ",
212 $conf->{is_success} = 1;
218 # these are fine but give errors
219 ftp://ftp.stratus.com/pub/vos/posix/ga/ga.html
220 ftp://ftp.stratus.com/pub/vos/utility/utility.html
222 # these are missing, sigh
223 ftp://ftp.sco.com/SLS/ptf7051e.Z
224 http://perlmonks.thepen.com/42898.html
225 http://svn.mutatus.co.uk/wsvn/Scalar-List-Utils/trunk/
226 http://public.activestate.com/cgi-bin/perlbrowse
227 http://svn.mutatus.co.uk/browse/libnet/tags/libnet-1.17/ChangeLog
228 http://aspn.activestate.com/ASPN/Mail/Message/perl6-internals/2746631
229 http://my.smithmicro.com/mac/stuffit/
230 http://www.wg.omron.co.jp/cgi-bin/j-e/jfriedl.html
231 http://persephone.cps.unizar.es/general/gente/spd/gzip/gzip.html
232 http://www.openzaurus.org/
234 http://download.microsoft.com/download/vc15/Patch/1.52/W95/EN-US/nmake15.exe
235 http://www.pvhp.com/~pvhp/
236 http://www.pvhp.com/%7Epvhp/
237 http://www.pvhp.com/%7epvhp/
239 http://www.madgoat.com
240 http://www.mks.com/s390/gnu/
241 http://www.research.att.com/sw/tools/uwin/
243 http://safaribooksonline.com/
244 http://use.perl.org/~autrijus/journal/25768
245 http://www.s390.ibm.com/products/oe/bpxqp1.html
246 http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/1998-02/msg01396.html
247 http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/1998-02/msg01489.html
248 http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/1998-02/msg01491.html
249 http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/1998-02/msg01608.html
250 http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/1998-02/msg02144.html
251 http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/1998-02/msg02998.html
252 http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/1999-03/msg00520.html
253 http://www.w3.org/Security/Faq/
255 # these are URI extraction bugs
256 http://www.perl.org/E
257 http://en.wikipedia.org/wiki/SREC_(file_format
258 http://somewhere.else',-type=/
262 http://search.cpan.org/src/KWILLIAMS/Module-Build-0.30/lib/Module/Build/Platform/Windows.pm:split_like_shell
263 http://www.xray.mpe.mpg.de/mailing-lists/perl5-
264 http://support.microsoft.com/support/kb/articles/Q280/3/41.ASP:
266 http://www.perl.come/
268 # these are used as an example
270 http://something.here/
271 http://users.perl5.git.perl.org/~yourlogin/
272 http://github.com/USERNAME/perl/tree/orange
273 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi
274 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi/somewhere/else?game=chess;game=checkers;weather=dull;foo=bar
275 http://somewhere.else$/
276 http://somewhere.else$/
277 http://somewhere.else/bin/foo&bar',-Type=
278 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi
279 http://the.good.ship.lollypop.com:8080/cgi-bin/foo.cgi/somewhere/else?game=chess;game=checkers;weather=dull;foo=bar
280 http://www.perl.org/test.cgi
282 http://search.cpan.org/perldoc?
284 http://cpan.dev.local/CPAN
288 ftp://ftp.software.ibm.com/aix/fixes/v4/other/vac.C.5.0.2.6.bff
289 http://www14.software.ibm.com/webapp/download/downloadaz.jsp
290 http://www.cpan.org/modules/by-module/Archive/Archive-Zip-*.tar.gz
291 http://www.unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT
292 http://localhost/tmp/index.txt
293 http://example.com/foo/bar.html
294 http://example.com/Text-Bastardize-1.06.tar.gz
295 ftp://example.com/sources/packages.txt
296 http://example.com/sources/packages.txt
297 http://example.com/sources
298 ftp://example.com/sources
299 http://some.where.com/dir/file.txt
300 http://some.where.com/dir/a.txt
304 http://www.foo.com:8000/
305 http://mysite.com/cgi-bin/log.cgi/http://www.some.other.site/args
306 http://decoded/mirror/path
307 http://a/b/c/d/e/f/g/h/i/j
310 http://purl.org/rss/1.0/modules/taxonomy/
311 ftp://ftp.sun.ac.za/CPAN/CPAN/
312 ftp://ftp.cpan.org/pub/mirror/index.txt
313 ftp://cpan.org/pub/mirror/index.txt
314 http://example.com/~eh/
315 http://plagger.org/.../rss
316 http://umn.dl.sourceforge.net/mingw/gcc-g++-3.4.5-20060117-1.tar.gz
317 http://umn.dl.sourceforge.net/mingw/binutils-2.16.91-20060119-1.tar.gz
318 http://umn.dl.sourceforge.net/mingw/mingw-runtime-3.10.tar.gz
319 http://umn.dl.sourceforge.net/mingw/gcc-core-3.4.5-20060117-1.tar.gz
320 http://umn.dl.sourceforge.net/mingw/w32api-3.6.tar.gz
321 http://search.cpan.org/CPAN/authors/id/S/SH/SHAY/dmake-4.5-20060619-SHAY.zip
322 http://module-build.sourceforge.net/META-spec-new.html
323 http://module-build.sourceforge.net/META-spec-v1.4.html
324 http://www.cs.vu.nl/~tmgil/vi.html
325 http://perlcomposer.sourceforge.net/vperl.html
326 http://www.perl.com/CPAN/authors/Tom_Christiansen/scripts/rep
327 http://w4.lns.cornell.edu/~pvhp/ptk/ptkTOC.html
328 http://world.std.com/~aep/ptkdb/
329 http://www.castlelink.co.uk/object_system/
330 http://www.fh-wedel.de/elvis/
331 ftp://ftp.blarg.net/users/amol/zsh/
332 ftp://ftp.funet.fi/pub/languages/perl/CPAN
333 http://search.cpan.org/CPAN/authors/id/S/SH/SHAY/dmake-4.5-20060619-SHAY.zip
334 http://users.perl5.git.perl.org/~USERNAME
335 http://foo/x//y/script.cgi/a//b
336 http://xxx/script.cgi/http://foo
337 http://foo/./x//z/script.cgi/a/../b//c
338 http://somewhere.else/in/movie/land
339 http://somewhere.else/finished.html
340 http://somewhere.else/bin/foo&bar$
341 http://somewhere.else/
344 http://myrepo.example.com/
347 http://example.com:1024/
351 http://example.com:9000/index.html
352 http://proxy.example.com:8080/
354 http://[www.json::pp.org]/
356 http://foo.example.com/
358 http://whatever/man/1/crontab
360 http://whatever/Foo%3A%3ABar
362 http://remote.server.com/jquery.css
363 http://some.other.com/page.html
366 http://link.included.here?o=1&p=2
367 http://link.included.here?o=1&p=2
368 http://link.included.here?o=1&p=2
369 http://link.included.here/
370 http://foo/x//y/script.cgi/a//b
371 http://xxx/script.cgi/http://foo
372 http://foo/./x//z/script.cgi/a/../b//c
373 http://somewhere.else/in/movie/land
374 http://somewhere.else/finished.html
375 http://webproxy:3128/
378 # these are used to generate or match URLs
379 http://www.cpan.org/modules/by-authors/id/$1/$1$2/$dist
380 http://www.cpantesters.org/show/%s.yaml
388 http://$addr/mark?commit=$
389 http://search.cpan.org/~
392 http://www.ietf.org/rfc/rfc$2.txt
393 http://search.cpan.org/~
396 # weird redirects that LWP doesn't like
397 http://www.theperlreview.com/community_calendar
398 http://www.software.hp.com/portal/swdepot/displayProductInfo.do?productNumber=PERL
399 http://sunsolve.sun.com
401 # broken webserver that doesn't like HEAD requests
402 http://docs.sun.com/app/docs/doc/816-5168/syslog-3c?a=view
403 http://www.w3.org/TR/html4/loose.dtd
405 # these have been reported upstream to CPAN authors
406 http://www.gnu.org/manual/tar/html_node/tar_139.html
407 http://www.w3.org/pub/WWW/TR/Wd-css-1.html
408 http://support.microsoft.com/support/kb/articles/Q280/3/41.ASP
409 http://msdn.microsoft.com/workshop/author/dhtml/httponly_cookies.asp
410 http://search.cpan.org/search?query=Module::Build::Convert
411 http://www.refcnt.org/papers/module-build-convert
412 http://csrc.nist.gov/cryptval/shs.html
413 http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset4.asp
414 http://www.debian.or.jp/~kubota/unicode-symbols.html.en
415 http://www.mail-archive.com/perl5-porters@perl.org/msg69766.html
416 http://www.debian.or.jp/~kubota/unicode-symbols.html.en
417 http://rfc.net/rfc2781.html
418 http://www.icu-project.org/charset/
419 http://ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html
420 http://www.rfc-editor.org/
422 http://www.oreilly.com/people/authors/lunde/cjk_inf.html
423 http://www.oreilly.com/catalog/cjkvinfo/
424 http://www.cpan.org/modules/by-author/Damian_Conway/Filter-Simple.tar.gz
425 http://www.csse.monash.edu.au/~damian/CPAN/Filter-Simple.tar.gz
426 http://www.egt.ie/standards/iso3166/iso3166-1-en.html
427 http://www.bsi-global.com/iso4217currency
428 http://www.plover.com/~mjd/perl/Memoize/
429 http://www.plover.com/~mjd/perl/MiniMemoize/
430 http://www.sysadminmag.com/tpj/issues/vol5_5/
431 ftp://ftp.tpc.int/tpc/server/UNIX/
432 http://www.nara.gov/genealogy/
433 http://home.utah-inter.net/kinsearch/Soundex.html
434 http://www.nara.gov/genealogy/soundex/soundex.html
435 http://rfc.net/rfc3461.html
436 ftp://ftp.cs.pdx.edu/pub/elvis/
437 http://www.fh-wedel.de/elvis/
438 http://lists.perl.org/list/perl-mvs.html
439 http://www.cpan.org/ports/os2/
440 http://github.com/dagolden/cpan-meta-spec
441 http://github.com/dagolden/cpan-meta-spec/issues
442 http://www.opensource.org/licenses/lgpl-license.phpt
443 http://reality.sgi.com/ariel
444 http://www.chiark.greenend.org.uk/pipermail/ukcrypto/1999-February/003538.html
445 http://www.chiark.greenend.org.uk/pipermail/ukcrypto/1999-February/003538.html
446 http://www.nsrl.nist.gov/testdata/
447 http://public.activestate.com/cgi-bin/perlbrowse/p/31194
448 http://public.activestate.com/cgi-bin/perlbrowse?patch=16173
449 http://public.activestate.com/cgi-bin/perlbrowse?patch=16049
450 http://www.li18nux.org/docs/html/CodesetAliasTable-V10.html
451 http://aspn.activestate.com/ASPN/Mail/Message/perl5-porters/3486118
452 http://lxr.mozilla.org/seamonkey/source/intl/uconv/ucvlatin/vps.ut
453 http://lxr.mozilla.org/seamonkey/source/intl/uconv/ucvlatin/vps.uf
454 http://github.com/schwern/extutils-makemaker
455 https://github.com/dagolden/cpanpm/compare/master...private%2Fuse-http-lite
456 http://www.json.org/JSON::PP_checker/
457 ftp://ftp.kiae.su/pub/unix/fido/
458 http://www.gallistel.net/nparker/weather/code/
459 http://www.javaworld.com/javaworld/jw-09-2001/jw-0928-ntmessages.html
460 ftp://ftp-usa.tpc.int/pub/tpc/server/UNIX/
461 http://www.cis.ohio-state.edu/htbin/rfc/rfc959.html
462 http://public.activestate.com/cgi-bin/perlbrowse/p/33567
463 http://public.activestate.com/cgi-bin/perlbrowse/p/33566
464 http://www.dsmit.com/cons/
465 http://www.makemaker.org/wiki/index.cgi?ModuleBuildConversionGuide
471 checkURL.pl - Check that all the URLs in the Perl source are valid
475 This program checks that all the URLs in the Perl source are valid. It
476 checks HTTP and FTP links in parallel and contains a list of known
477 bad example links in its source. It takes 4 minutes to run on my
478 machine. The results are written to 'uris.txt' and list the filename,
479 the URL and the error:
481 * ext/Locale-Maketext/lib/Locale/Maketext.pod
482 http://sunsite.dk/RFC/rfc/rfc2277.html
486 It should be run every so often and links fixed and upstream authors
489 Note that the web is unstable and some websites are temporarily down.