9 use File::Basename qw[dirname];
13 use IPC::Cmd qw[can_run run QUOTE];
14 use File::Path qw[mkpath];
15 use File::Temp qw[tempdir];
16 use Params::Check qw[check];
17 use Module::Load::Conditional qw[can_load];
18 use Locale::Maketext::Simple Style => 'gettext';
20 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
26 $VERSION = eval $VERSION; # avoid warnings with development releases
27 $PREFER_BIN = 0; # XXX TODO implement
28 $FROM_EMAIL = 'File-Fetch@example.com';
29 $USER_AGENT = "File::Fetch/$VERSION";
30 $BLACKLIST = [qw|ftp|];
31 push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux';
39 ### methods available to fetch the file depending on the scheme
41 http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
42 https => [ qw|lwp wget curl| ],
43 ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
44 file => [ qw|lwp lftp file| ],
45 rsync => [ qw|rsync| ],
49 ### silly warnings ###
50 local $Params::Check::VERBOSE = 1;
51 local $Params::Check::VERBOSE = 1;
52 local $Module::Load::Conditional::VERBOSE = 0;
53 local $Module::Load::Conditional::VERBOSE = 0;
55 ### see what OS we are on, important for file:// uris ###
56 use constant ON_WIN => ($^O eq 'MSWin32');
57 use constant ON_VMS => ($^O eq 'VMS');
58 use constant ON_UNIX => (!ON_WIN);
59 use constant HAS_VOL => (ON_WIN);
60 use constant HAS_SHARE => (ON_WIN);
61 use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
67 File::Fetch - A generic file fetching mechanism
73 ### build a File::Fetch object ###
74 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
76 ### fetch the uri to cwd() ###
77 my $where = $ff->fetch() or die $ff->error;
79 ### fetch the uri to /tmp ###
80 my $where = $ff->fetch( to => '/tmp' );
82 ### parsed bits from the uri ###
91 File::Fetch is a generic file fetching mechanism.
93 It allows you to fetch any file pointed to by a C<ftp>, C<http>,
94 C<file>, C<git> or C<rsync> uri by a number of different means.
96 See the C<HOW IT WORKS> section further down for details.
100 A C<File::Fetch> object has the following accessors
106 The uri you passed to the constructor
110 The scheme from the uri (like 'file', 'http', etc)
114 The hostname in the uri. Will be empty if host was originally
115 'localhost' for a 'file://' url.
119 On operating systems with the concept of a volume the second element
120 of a file:// is considered to the be volume specification for the file.
121 Thus on Win32 this routine returns the volume, on other operating
122 systems this returns nothing.
124 On Windows this value may be empty if the uri is to a network share, in
125 which case the 'share' property will be defined. Additionally, volume
126 specifications that use '|' as ':' will be converted on read to use ':'.
128 On VMS, which has a volume concept, this field will be empty because VMS
129 file specifications are converted to absolute UNIX format and the volume
130 information is transparently included.
134 On systems with the concept of a network share (currently only Windows) returns
135 the sharename from a file://// url. On other operating systems returns empty.
139 The path from the uri, will be at least a single '/'.
143 The name of the remote file. For the local file name, the
144 result of $ff->output_file will be used.
146 =item $ff->file_default
148 The name of the default local file, that $ff->output_file falls back to if
149 it would otherwise return no filename. For example when fetching a URI like
150 http://www.abc.net.au/ the contents retrieved may be from a remote file called
151 'index.html'. The default value of this attribute is literally 'file_default'.
156 ##########################
157 ### Object & Accessors ###
158 ##########################
161 ### template for autogenerated accessors ###
163 scheme => { default => 'http' },
164 host => { default => 'localhost' },
165 path => { default => '/' },
166 file => { required => 1 },
167 uri => { required => 1 },
168 userinfo => { default => '' },
169 vol => { default => '' }, # windows for file:// uris
170 share => { default => '' }, # windows for file:// uris
171 file_default => { default => 'file_default' },
172 tempdir_root => { required => 1 }, # Should be lazy-set at ->new()
173 _error_msg => { no_override => 1 },
174 _error_msg_long => { no_override => 1 },
177 for my $method ( keys %$Tmpl ) {
181 $self->{$method} = $_[0] if @_;
182 return $self->{$method};
190 my $args = check( $Tmpl, \%hash ) or return;
194 if( lc($args->scheme) ne 'file' and not $args->host ) {
195 return $class->_error(loc(
196 "Hostname required when fetching from '%1'",$args->scheme));
200 unless( $args->$_() ) { # 5.5.x needs the ()
201 return $class->_error(loc("No '%1' specified",$_));
209 =item $ff->output_file
211 The name of the output file. This is the same as $ff->file,
212 but any query parameters are stripped off. For example:
214 http://example.com/index.html?x=y
216 would make the output file be C<index.html> rather than
225 my $file = $self->file;
229 $file ||= $self->file_default;
234 ### XXX do this or just point to URI::Escape?
235 # =head2 $esc_uri = $ff->escaped_uri
239 # ### most of this is stolen straight from URI::escape
240 # { ### Build a char->hex map
241 # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
245 # my $uri = $self->uri;
247 # ### Default unsafe characters. RFC 2732 ^(uric - reserved)
248 # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
249 # $escapes{$1} || $self->_fail_hi($1)/ge;
259 # "Can't escape '%1', try using the '%2' module instead",
260 # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
273 =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
275 Parses the uri and creates a corresponding File::Fetch::Item object,
276 that is ready to be C<fetch>ed and returns it.
278 Returns false on failure.
286 my ($uri, $file_default, $tempdir_root);
288 uri => { required => 1, store => \$uri },
289 file_default => { required => 0, store => \$file_default },
290 tempdir_root => { required => 0, store => \$tempdir_root },
293 check( $tmpl, \%hash ) or return;
295 ### parse the uri to usable parts ###
296 my $href = $class->_parse_uri( $uri ) or return;
298 $href->{file_default} = $file_default if $file_default;
299 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
300 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root};
302 ### make it into a FFI object ###
303 my $ff = $class->_create( %$href ) or return;
306 ### return the object ###
310 ### parses an uri to a hash structure:
312 ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
318 ### host => 'ftp.cpan.org',
319 ### path => '/pub/mirror',
320 ### file => 'index.html'
323 ### In the case of file:// urls there maybe be additional fields
325 ### For systems with volume specifications such as Win32 there will be
326 ### a volume specifier provided in the 'vol' field.
328 ### 'vol' => 'volumename'
330 ### For windows file shares there may be a 'share' key specified
332 ### 'share' => 'sharename'
334 ### Note that the rules of what a file:// url means vary by the operating system
335 ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
336 ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
337 ### not '/foo/bar.txt'
339 ### Similarly if the host interpreting the url is VMS then
340 ### file:///disk$user/my/notes/note12345.txt' means
341 ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
342 ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
343 ### Except for some cases in the File::Spec methods, Perl on VMS will generally
344 ### handle UNIX format file specifications.
346 ### This means it is impossible to serve certain file:// urls on certain systems.
348 ### Thus are the problems with a protocol-less specification. :-(
353 my $uri = shift or return;
355 my $href = { uri => $uri };
357 ### find the scheme ###
358 $uri =~ s|^(\w+)://||;
359 $href->{scheme} = $1;
361 ### See rfc 1738 section 3.10
362 ### https://datatracker.ietf.org/doc/html/rfc1738#section-3.10
363 ### And wikipedia for more on windows file:// urls
364 ### http://en.wikipedia.org/wiki/File://
365 if( $href->{scheme} eq 'file' ) {
367 my @parts = split '/',$uri;
369 ### file://hostname/...
370 ### file://hostname/...
371 ### normalize file://localhost with file:///
372 $href->{host} = $parts[0] || '';
374 ### index in @parts where the path components begin;
377 ### file:////hostname/sharename/blah.txt
378 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
380 $href->{host} = $parts[2] || ''; # avoid warnings
381 $href->{share} = $parts[3] || ''; # avoid warnings
383 $index = 4 # index after the share
385 ### file:///D|/blah.txt
386 ### file:///D:/blah.txt
389 ### this code comes from dmq's patch, but:
390 ### XXX if volume is empty, wouldn't that be an error? --kane
391 ### if so, our file://localhost test needs to be fixed as wel
392 $href->{vol} = $parts[1] || '';
394 ### correct D| style colume descriptors
395 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
397 $index = 2; # index after the volume
400 ### rebuild the path from the leftover parts;
401 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
404 ### using anything but qw() in hash slices may produce warnings
405 ### in older perls :-(
406 @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s;
409 ### split the path into file + dir ###
410 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
411 $href->{path} = $parts[1];
412 $href->{file} = $parts[2];
415 ### host will be empty if the target was 'localhost' and the
416 ### scheme was 'file'
417 $href->{host} = '' if ($href->{host} eq 'localhost') and
418 ($href->{scheme} eq 'file');
423 =head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
425 Fetches the file you requested and returns the full path to the file.
427 By default it writes to C<cwd()>, but you can override that by specifying
430 ### file fetch to /tmp, full path to the file in $where
431 $where = $ff->fetch( to => '/tmp' );
433 ### file slurped into $scalar, full path to the file in $where
434 ### file is downloaded to a temp directory and cleaned up at exit time
435 $where = $ff->fetch( to => \$scalar );
437 Returns the full path to the downloaded file on success, and false
443 my $self = shift or return;
448 to => { default => cwd(), store => \$target },
451 check( $tmpl, \%hash ) or return;
454 ### you want us to slurp the contents
455 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
456 $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
462 ### On VMS force to VMS format so File::Spec will work.
463 $to = VMS::Filespec::vmspath($to) if ON_VMS;
465 ### create the path if it doesn't exist yet ###
467 eval { mkpath( $to ) };
469 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
473 ### set passive ftp if required ###
474 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
476 ### we dont use catfile on win32 because if we are using a cygwin tool
477 ### under cmd.exe they wont understand windows style separators.
478 my $out_to = ON_WIN ? $to.'/'.$self->output_file
479 : File::Spec->catfile( $to, $self->output_file );
481 for my $method ( @{ $METHODS->{$self->scheme} } ) {
482 my $sub = '_'.$method.'_fetch';
484 unless( __PACKAGE__->can($sub) ) {
485 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
490 ### method is blacklisted ###
491 next if grep { lc $_ eq $method } @$BLACKLIST;
493 ### method is known to fail ###
494 next if $METHOD_FAIL->{$method};
496 ### there's serious issues with IPC::Run and quoting of command
497 ### line arguments. using quotes in the wrong place breaks things,
498 ### and in the case of say,
499 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
500 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
501 ### it doesn't matter how you quote, it always fails.
502 local $IPC::Cmd::USE_IPC_RUN = 0;
504 if( my $file = $self->$sub(
508 unless( -e $file && -s _ ) {
509 $self->_error(loc("'%1' said it fetched '%2', ".
510 "but it was not created",$method,$file));
512 ### mark the failure ###
513 $METHOD_FAIL->{$method} = 1;
520 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
523 open my $fh, "<$file" or do {
525 loc("Could not open '%1': %2", $file, $!));
530 $$target = do { local $/; <$fh> };
534 my $abs = File::Spec->rel2abs( $file );
542 ### if we got here, we looped over all methods, but we weren't able
547 ########################
548 ### _*_fetch methods ###
549 ########################
558 to => { required => 1, store => \$to }
560 check( $tmpl, \%hash ) or return;
562 ### modules required to download with lwp ###
565 'LWP::UserAgent' => '0.0',
566 'HTTP::Request' => '0.0',
567 'HTTP::Status' => '0.0',
572 if ($self->scheme eq 'https') {
573 $use_list->{'LWP::Protocol::https'} = '0';
576 ### Fix CVE-2016-1238 ###
577 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
578 unless( can_load( modules => $use_list ) ) {
579 $METHOD_FAIL->{'lwp'} = 1;
583 ### setup the uri object
584 my $uri = URI->new( File::Spec::Unix->catfile(
585 $self->path, $self->file
588 ### special rules apply for file:// uris ###
589 $uri->scheme( $self->scheme );
590 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
592 if ($self->userinfo) {
593 $uri->userinfo($self->userinfo);
594 } elsif ($self->scheme ne 'file') {
595 $uri->userinfo("anonymous:$FROM_EMAIL");
598 ### set up the useragent object
599 my $ua = LWP::UserAgent->new();
600 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
601 $ua->agent( $USER_AGENT );
602 $ua->from( $FROM_EMAIL );
605 my $res = $ua->mirror($uri, $to) or return;
607 ### uptodate or fetched ok ###
608 if ( $res->code == 304 or $res->code == 200 ) {
612 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
613 $res->code, HTTP::Status::status_message($res->code),
619 ### HTTP::Tiny fetching ###
620 sub _httptiny_fetch {
626 to => { required => 1, store => \$to }
628 check( $tmpl, \%hash ) or return;
631 'HTTP::Tiny' => '0.008',
635 ### Fix CVE-2016-1238 ###
636 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
637 unless( can_load(modules => $use_list) ) {
638 $METHOD_FAIL->{'httptiny'} = 1;
642 my $uri = $self->uri;
644 my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
646 my $rc = $http->mirror( $uri, $to );
648 unless ( $rc->{success} ) {
650 return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
651 $rc->{status}, $rc->{reason} ) );
659 ### HTTP::Lite fetching ###
660 sub _httplite_fetch {
666 to => { required => 1, store => \$to }
668 check( $tmpl, \%hash ) or return;
670 ### modules required to download with lwp ###
672 'HTTP::Lite' => '2.2',
673 'MIME::Base64' => '0',
676 ### Fix CVE-2016-1238 ###
677 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
678 unless( can_load(modules => $use_list) ) {
679 $METHOD_FAIL->{'httplite'} = 1;
683 my $uri = $self->uri;
686 RETRIES: while ( $retries++ < 5 ) {
688 my $http = HTTP::Lite->new();
689 # Naughty naughty but there isn't any accessor/setter
690 $http->{timeout} = $TIMEOUT if $TIMEOUT;
691 $http->http11_mode(1);
693 if ($self->userinfo) {
694 my $encoded = MIME::Base64::encode($self->userinfo, '');
695 $http->add_req_header("Authorization", "Basic $encoded");
698 my $fh = FileHandle->new;
700 unless ( $fh->open($to,'>') ) {
701 return $self->_error(loc(
702 "Could not open '%1' for writing: %2",$to,$!));
709 my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
713 if ( $rc == 301 || $rc == 302 ) {
715 HEADERS: for ($http->headers_array) {
716 /Location: (\S+)/ and $loc = $1, last HEADERS;
718 #$loc or last; # Think we should squeal here.
720 $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
728 elsif ( $rc == 200 ) {
732 return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
733 $rc, $http->status_message));
736 } # Loop for 5 retries.
738 return $self->_error("Fetch failed! Gave up after 5 tries");
742 ### Simple IO::Socket::INET fetching ###
749 to => { required => 1, store => \$to }
751 check( $tmpl, \%hash ) or return;
754 'IO::Socket::INET' => '0.0',
755 'IO::Select' => '0.0',
758 ### Fix CVE-2016-1238 ###
759 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
760 unless( can_load(modules => $use_list) ) {
761 $METHOD_FAIL->{'iosock'} = 1;
765 my $sock = IO::Socket::INET->new(
766 PeerHost => $self->host,
767 ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
771 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
774 my $fh = FileHandle->new;
778 unless ( $fh->open($to,'>') ) {
779 return $self->_error(loc(
780 "Could not open '%1' for writing: %2",$to,$!));
786 my $path = File::Spec::Unix->catfile( $self->path, $self->file );
787 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
790 my $select = IO::Select->new( $sock );
794 while ( $select->can_read( $TIMEOUT || 60 ) ) {
795 my $ret = $sock->sysread( $resp, 4096, length($resp) );
796 if ( !defined $ret or $ret == 0 ) {
797 $select->remove( $sock );
804 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
807 # Check the "response"
808 # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
809 $resp =~ s/^(\x0d?\x0a)+//;
810 # Check it is an HTTP response
811 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
812 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
816 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
817 unless ( $code eq '200' ) {
818 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
823 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
829 ### Net::FTP fetching
836 to => { required => 1, store => \$to }
838 check( $tmpl, \%hash ) or return;
840 ### required modules ###
841 my $use_list = { 'Net::FTP' => 0 };
843 ### Fix CVE-2016-1238 ###
844 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
845 unless( can_load( modules => $use_list ) ) {
846 $METHOD_FAIL->{'netftp'} = 1;
850 ### make connection ###
852 my @options = ($self->host);
853 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
854 unless( $ftp = Net::FTP->new( @options ) ) {
855 return $self->_error(loc("Ftp creation failed: %1",$@));
859 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
860 return $self->_error(loc("Could not login to '%1'",$self->host));
863 ### set binary mode, just in case ###
866 ### create the remote path
867 ### remember remote paths are unix paths! [#11483]
868 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
870 ### fetch the file ###
872 unless( $target = $ftp->get( $remote, $to ) ) {
873 return $self->_error(loc("Could not fetch '%1' from '%2'",
874 $remote, $self->host));
884 ### /bin/wget fetch ###
891 to => { required => 1, store => \$to }
893 check( $tmpl, \%hash ) or return;
896 ### see if we have a wget binary ###
897 unless( $wget = can_run('wget') ) {
898 $METHOD_FAIL->{'wget'} = 1;
902 ### no verboseness, thanks ###
903 my $cmd = [ $wget, '--quiet' ];
905 ### if a timeout is set, add it ###
906 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
908 ### run passive if specified ###
909 push @$cmd, '--passive-ftp' if $self->scheme eq 'ftp' && $FTP_PASSIVE;
911 ### set the output document, add the uri ###
912 push @$cmd, '--output-document', $to, $self->uri;
914 ### with IPC::Cmd > 0.41, this is fixed in teh library,
915 ### and there's no need for special casing any more.
916 ### DO NOT quote things for IPC::Run, it breaks stuff.
917 # $IPC::Cmd::USE_IPC_RUN
918 # ? ($to, $self->uri)
919 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
923 unless(run( command => $cmd,
924 buffer => \$captured,
927 ### wget creates the output document always, even if the fetch
928 ### fails.. so unlink it in that case
931 return $self->_error(loc( "Command failed: %1", $captured || '' ));
937 ### /bin/lftp fetch ###
944 to => { required => 1, store => \$to }
946 check( $tmpl, \%hash ) or return;
948 ### see if we have a lftp binary ###
950 unless( $lftp = can_run('lftp') ) {
951 $METHOD_FAIL->{'lftp'} = 1;
955 ### no verboseness, thanks ###
956 my $cmd = [ $lftp, '-f' ];
958 my $fh = File::Temp->new;
962 ### if a timeout is set, add it ###
963 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
965 ### run passive if specified ###
966 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
968 ### set the output document, add the uri ###
969 ### quote the URI, because lftp supports certain shell
970 ### expansions, most notably & for backgrounding.
971 ### ' quote does nto work, must be "
972 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
975 my $pp_str = join ' ', split $/, $str;
976 print "# lftp command: $pp_str\n";
979 ### write straight to the file.
983 ### the command needs to be 1 string to be executed
984 push @$cmd, $fh->filename;
986 ### with IPC::Cmd > 0.41, this is fixed in teh library,
987 ### and there's no need for special casing any more.
988 ### DO NOT quote things for IPC::Run, it breaks stuff.
989 # $IPC::Cmd::USE_IPC_RUN
990 # ? ($to, $self->uri)
991 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
996 unless(run( command => $cmd,
997 buffer => \$captured,
1000 ### wget creates the output document always, even if the fetch
1001 ### fails.. so unlink it in that case
1004 return $self->_error(loc( "Command failed: %1", $captured || '' ));
1012 ### /bin/ftp fetch ###
1019 to => { required => 1, store => \$to }
1021 check( $tmpl, \%hash ) or return;
1023 ### see if we have a ftp binary ###
1025 unless( $ftp = can_run('ftp') ) {
1026 $METHOD_FAIL->{'ftp'} = 1;
1030 my $fh = FileHandle->new;
1032 local $SIG{CHLD} = 'IGNORE';
1034 unless ($fh->open("$ftp -n", '|-')) {
1035 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1039 "lcd " . dirname($to),
1040 "open " . $self->host,
1041 "user anonymous $FROM_EMAIL",
1043 "cd " . $self->path,
1045 "get " . $self->file . " " . $self->output_file,
1049 foreach (@dialog) { $fh->print($_, "\n") }
1050 $fh->close or return;
1055 ### lynx is stupid - it decompresses any .gz file it finds to be text
1056 ### use /bin/lynx to fetch files
1063 to => { required => 1, store => \$to }
1065 check( $tmpl, \%hash ) or return;
1067 ### see if we have a lynx binary ###
1069 unless ( $lynx = can_run('lynx') ){
1070 $METHOD_FAIL->{'lynx'} = 1;
1074 unless( IPC::Cmd->can_capture_buffer ) {
1075 $METHOD_FAIL->{'lynx'} = 1;
1077 return $self->_error(loc(
1078 "Can not capture buffers. Can not use '%1' to fetch files",
1082 ### check if the HTTP resource exists ###
1083 if ($self->uri =~ /^https?:\/\//i) {
1088 "-auth=anonymous:$FROM_EMAIL",
1091 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1093 push @$cmd, $self->uri;
1097 unless(run( command => $cmd,
1101 return $self->_error(loc("Command failed: %1", $head || ''));
1104 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1105 return $self->_error(loc("Command failed: %1", $head || ''));
1109 ### write to the output file ourselves, since lynx ass_u_mes to much
1110 my $local = FileHandle->new( $to, 'w' )
1111 or return $self->_error(loc(
1112 "Could not open '%1' for writing: %2",$to,$!));
1114 ### dump to stdout ###
1118 "-auth=anonymous:$FROM_EMAIL",
1121 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1123 ### DO NOT quote things for IPC::Run, it breaks stuff.
1124 push @$cmd, $self->uri;
1126 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1127 ### and there's no need for special casing any more.
1128 ### DO NOT quote things for IPC::Run, it breaks stuff.
1129 # $IPC::Cmd::USE_IPC_RUN
1131 # : QUOTE. $self->uri .QUOTE;
1136 unless(run( command => $cmd,
1137 buffer => \$captured,
1140 return $self->_error(loc("Command failed: %1", $captured || ''));
1143 ### print to local file ###
1144 ### XXX on a 404 with a special error page, $captured will actually
1145 ### hold the contents of that page, and make it *appear* like the
1146 ### request was a success, when really it wasn't :(
1147 ### there doesn't seem to be an option for lynx to change the exit
1148 ### code based on a 4XX status or so.
1149 ### the closest we can come is using --error_file and parsing that,
1150 ### which is very unreliable ;(
1151 $local->print( $captured );
1152 $local->close or return;
1157 ### use /bin/ncftp to fetch files
1164 to => { required => 1, store => \$to }
1166 check( $tmpl, \%hash ) or return;
1168 ### we can only set passive mode in interactive sessions, so bail out
1169 ### if $FTP_PASSIVE is set
1170 return if $FTP_PASSIVE;
1172 ### see if we have a ncftp binary ###
1174 unless( $ncftp = can_run('ncftp') ) {
1175 $METHOD_FAIL->{'ncftp'} = 1;
1181 '-V', # do not be verbose
1182 '-p', $FROM_EMAIL, # email as password
1183 $self->host, # hostname
1184 dirname($to), # local dir for the file
1185 # remote path to the file
1186 ### DO NOT quote things for IPC::Run, it breaks stuff.
1187 $IPC::Cmd::USE_IPC_RUN
1188 ? File::Spec::Unix->catdir( $self->path, $self->file )
1189 : QUOTE. File::Spec::Unix->catdir(
1190 $self->path, $self->file ) .QUOTE
1196 unless(run( command => $cmd,
1197 buffer => \$captured,
1200 return $self->_error(loc("Command failed: %1", $captured || ''));
1207 ### use /bin/curl to fetch files
1214 to => { required => 1, store => \$to }
1216 check( $tmpl, \%hash ) or return;
1218 unless ( $curl = can_run('curl') ) {
1219 $METHOD_FAIL->{'curl'} = 1;
1223 ### these long opts are self explanatory - I like that -jmb
1224 my $cmd = [ $curl, '-q' ];
1226 push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
1228 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1230 push(@$cmd, '--silent') unless $DEBUG;
1232 ### curl does the right thing with passive, regardless ###
1233 if ($self->scheme eq 'ftp') {
1234 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1237 ### curl doesn't follow 302 (temporarily moved) etc automatically
1238 ### so we add --location to enable that.
1239 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1241 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1242 ### and there's no need for special casing any more.
1243 ### DO NOT quote things for IPC::Run, it breaks stuff.
1244 # $IPC::Cmd::USE_IPC_RUN
1245 # ? ($to, $self->uri)
1246 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1250 unless(run( command => $cmd,
1251 buffer => \$captured,
1255 return $self->_error(loc("Command failed: %1", $captured || ''));
1262 ### /usr/bin/fetch fetch! ###
1269 to => { required => 1, store => \$to }
1271 check( $tmpl, \%hash ) or return;
1273 ### see if we have a fetch binary ###
1275 unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1276 $METHOD_FAIL->{'fetch'} = 1;
1280 ### no verboseness, thanks ###
1281 my $cmd = [ $fetch, '-q' ];
1283 ### if a timeout is set, add it ###
1284 push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1286 ### run passive if specified ###
1287 #push @$cmd, '-p' if $FTP_PASSIVE;
1288 local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1290 ### set the output document, add the uri ###
1291 push @$cmd, '-o', $to, $self->uri;
1293 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1294 ### and there's no need for special casing any more.
1295 ### DO NOT quote things for IPC::Run, it breaks stuff.
1296 # $IPC::Cmd::USE_IPC_RUN
1297 # ? ($to, $self->uri)
1298 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1302 unless(run( command => $cmd,
1303 buffer => \$captured,
1306 ### wget creates the output document always, even if the fetch
1307 ### fails.. so unlink it in that case
1310 return $self->_error(loc( "Command failed: %1", $captured || '' ));
1316 ### use File::Copy for fetching file:// urls ###
1318 ### See section 3.10 of RFC 1738 (https://datatracker.ietf.org/doc/html/rfc1738#section-3.10)
1319 ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1328 to => { required => 1, store => \$to }
1330 check( $tmpl, \%hash ) or return;
1334 ### prefix a / on unix systems with a file uri, since it would
1335 ### look somewhat like this:
1336 ### file:///home/kane/file
1337 ### whereas windows file uris for 'c:\some\dir\file' might look like:
1338 ### file:///C:/some/dir/file
1339 ### file:///C|/some/dir/file
1340 ### or for a network share '\\host\share\some\dir\file':
1341 ### file:////host/share/some/dir/file
1343 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1344 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1347 my $path = $self->path;
1348 my $vol = $self->vol;
1349 my $share = $self->share;
1352 if (!$share and $self->host) {
1353 return $self->_error(loc(
1354 "Currently %1 cannot handle hosts in %2 urls",
1355 'File::Fetch', 'file://'
1360 $path = File::Spec->catdir( split /\//, $path );
1361 $remote = File::Spec->catpath( $vol, $path, $self->file);
1364 ### win32 specific, and a share name, so we wont bother with File::Spec
1366 $remote = "\\\\".$self->host."\\$share\\$path";
1369 ### File::Spec on VMS can not currently handle UNIX syntax.
1370 my $file_class = ON_VMS
1371 ? 'File::Spec::Unix'
1374 $remote = $file_class->catfile( $path, $self->file );
1377 ### File::Copy is littered with 'die' statements :( ###
1378 my $rv = eval { File::Copy::copy( $remote, $to ) };
1380 ### something went wrong ###
1382 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1383 $remote, $to, $!, $@));
1389 ### use /usr/bin/rsync to fetch files
1396 to => { required => 1, store => \$to }
1398 check( $tmpl, \%hash ) or return;
1400 unless ( $rsync = can_run('rsync') ) {
1401 $METHOD_FAIL->{'rsync'} = 1;
1405 my $cmd = [ $rsync ];
1407 ### XXX: rsync has no I/O timeouts at all, by default
1408 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1410 push(@$cmd, '--quiet') unless $DEBUG;
1412 ### DO NOT quote things for IPC::Run, it breaks stuff.
1413 push @$cmd, $self->uri, $to;
1415 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1416 ### and there's no need for special casing any more.
1417 ### DO NOT quote things for IPC::Run, it breaks stuff.
1418 # $IPC::Cmd::USE_IPC_RUN
1419 # ? ($to, $self->uri)
1420 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1423 unless(run( command => $cmd,
1424 buffer => \$captured,
1428 return $self->_error(loc("Command %1 failed: %2",
1429 "@$cmd" || '', $captured || ''));
1436 ### use git to fetch files
1443 to => { required => 1, store => \$to }
1445 check( $tmpl, \%hash ) or return;
1447 unless ( $git = can_run('git') ) {
1448 $METHOD_FAIL->{'git'} = 1;
1452 my $cmd = [ $git, 'clone' ];
1454 #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1456 push(@$cmd, '--quiet') unless $DEBUG;
1458 ### DO NOT quote things for IPC::Run, it breaks stuff.
1459 push @$cmd, $self->uri, $to;
1461 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1462 ### and there's no need for special casing any more.
1463 ### DO NOT quote things for IPC::Run, it breaks stuff.
1464 # $IPC::Cmd::USE_IPC_RUN
1465 # ? ($to, $self->uri)
1466 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1469 unless(run( command => $cmd,
1470 buffer => \$captured,
1474 return $self->_error(loc("Command %1 failed: %2",
1475 "@$cmd" || '', $captured || ''));
1482 #################################
1486 #################################
1490 =head2 $ff->error([BOOL])
1492 Returns the last encountered error as string.
1493 Pass it a true value to get the C<Carp::longmess()> output instead.
1497 ### error handling the way Archive::Extract does it
1502 $self->_error_msg( $error );
1503 $self->_error_msg_long( Carp::longmess($error) );
1506 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1514 return shift() ? $self->_error_msg_long : $self->_error_msg;
1524 File::Fetch is able to fetch a variety of uris, by using several
1525 external programs and modules.
1527 Below is a mapping of what utilities will be used in what order
1528 for what schemes, if available:
1530 file => LWP, lftp, file
1531 http => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock
1532 ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1536 If you'd like to disable the use of one or more of these utilities
1537 and/or modules, see the C<$BLACKLIST> variable further down.
1539 If a utility or module isn't available, it will be marked in a cache
1540 (see the C<$METHOD_FAIL> variable further down), so it will not be
1541 tried again. The C<fetch> method will only fail when all options are
1542 exhausted, and it was not able to retrieve the file.
1544 The C<fetch> utility is available on FreeBSD. NetBSD and Dragonfly BSD
1545 may also have it from C<pkgsrc>. We only check for C<fetch> on those
1548 C<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1549 retrieving C<http> schemed urls. It doesn't follow redirects for instance.
1551 C<git> only supports C<git://> style urls.
1553 A special note about fetching files from an ftp uri:
1555 By default, all ftp connections are done in passive mode. To change
1556 that, see the C<$FTP_PASSIVE> variable further down.
1558 Furthermore, ftp uris only support anonymous connections, so no
1559 named user/password pair can be passed along.
1561 C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1564 =head1 GLOBAL VARIABLES
1566 The behaviour of File::Fetch can be altered by changing the following
1569 =head2 $File::Fetch::FROM_EMAIL
1571 This is the email address that will be sent as your anonymous ftp
1574 Default is C<File-Fetch@example.com>.
1576 =head2 $File::Fetch::USER_AGENT
1578 This is the useragent as C<LWP> will report it.
1580 Default is C<File::Fetch/$VERSION>.
1582 =head2 $File::Fetch::FTP_PASSIVE
1584 This variable controls whether the environment variable C<FTP_PASSIVE>
1585 and any passive switches to commandline tools will be set to true.
1589 Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1590 files, since passive mode can only be set interactively for this binary
1592 =head2 $File::Fetch::TIMEOUT
1594 When set, controls the network timeout (counted in seconds).
1598 =head2 $File::Fetch::WARN
1600 This variable controls whether errors encountered internally by
1601 C<File::Fetch> should be C<carp>'d or not.
1603 Set to false to silence warnings. Inspect the output of the C<error()>
1604 method manually to see what went wrong.
1606 Defaults to C<true>.
1608 =head2 $File::Fetch::DEBUG
1610 This enables debugging output when calling commandline utilities to
1612 This also enables C<Carp::longmess> errors, instead of the regular
1615 Good for tracking down why things don't work with your particular
1620 =head2 $File::Fetch::BLACKLIST
1622 This is an array ref holding blacklisted modules/utilities for fetching
1625 To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1626 set $File::Fetch::BLACKLIST to:
1628 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1630 The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1632 See the note on C<MAPPING> below.
1634 =head2 $File::Fetch::METHOD_FAIL
1636 This is a hashref registering what modules/utilities were known to fail
1637 for fetching files (mostly because they weren't installed).
1639 You can reset this cache by assigning an empty hashref to it, or
1640 individually remove keys.
1642 See the note on C<MAPPING> below.
1647 Here's a quick mapping for the utilities/modules, and their names for
1648 the $BLACKLIST, $METHOD_FAIL and other internal functions.
1651 HTTP::Lite => httplite
1652 HTTP::Tiny => httptiny
1662 IO::Socket => iosock
1664 =head1 FREQUENTLY ASKED QUESTIONS
1666 =head2 So how do I use a proxy with File::Fetch?
1668 C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1669 You will need to set your environment variables accordingly. For
1670 example, to use an ftp proxy:
1672 $ENV{ftp_proxy} = 'foo.com';
1674 Refer to the LWP::UserAgent manpage for more details.
1676 =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1678 C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1679 which we in turn capture. If that content is a 'custom' error file
1680 (like, say, a C<404 handler>), you will get that contents instead.
1682 Sadly, C<lynx> doesn't support any options to return a different exit
1683 code on non-C<200 OK> status, giving us no way to tell the difference
1684 between a 'successful' fetch and a custom error page.
1686 Therefor, we recommend to only use C<lynx> as a last resort. This is
1687 why it is at the back of our list of methods to try as well.
1689 =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1691 C<File::Fetch> is relatively smart about things. When trying to write
1692 a file to disk, it removes the C<query parameters> (see the
1693 C<output_file> method for details) from the file name before creating
1694 it. In most cases this suffices.
1696 If you have any other characters you need to escape, please install
1697 the C<URI::Escape> module from CPAN, and pre-encode your URI before
1698 passing it to C<File::Fetch>. You can read about the details of URIs
1699 and URI encoding here:
1701 L<https://datatracker.ietf.org/doc/html/rfc2396>
1707 =item Implement $PREFER_BIN
1709 To indicate to rather use commandline tools than modules
1715 Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1719 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1723 This library is free software; you may redistribute and/or modify it
1724 under the same terms as Perl itself.
1730 # c-indentation-style: bsd
1732 # indent-tabs-mode: nil
1734 # vim: expandtab shiftwidth=4: