This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update File-Fetch to CPAN version 0.24
[perl5.git] / cpan / File-Fetch / lib / File / Fetch.pm
CommitLineData
79fd8837
JB
1package File::Fetch;
2
3use strict;
4use FileHandle;
6e654618 5use File::Temp;
79fd8837
JB
6use File::Copy;
7use File::Spec;
8use File::Spec::Unix;
79fd8837
JB
9use File::Basename qw[dirname];
10
11use Cwd qw[cwd];
12use Carp qw[carp];
6b6e6e92 13use IPC::Cmd qw[can_run run QUOTE];
79fd8837 14use File::Path qw[mkpath];
8d16e270 15use File::Temp qw[tempdir];
79fd8837
JB
16use Params::Check qw[check];
17use Module::Load::Conditional qw[can_load];
18use Locale::Maketext::Simple Style => 'gettext';
19
20use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21 $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22 $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
23 ];
24
16610ad9 25$VERSION = '0.24';
fe98d82b
RGS
26$VERSION = eval $VERSION; # avoid warnings with development releases
27$PREFER_BIN = 0; # XXX TODO implement
79fd8837 28$FROM_EMAIL = 'File-Fetch@example.com';
6b6e6e92 29$USER_AGENT = "File::Fetch/$VERSION";
79fd8837
JB
30$BLACKLIST = [qw|ftp|];
31$METHOD_FAIL = { };
32$FTP_PASSIVE = 1;
33$TIMEOUT = 0;
34$DEBUG = 0;
35$WARN = 1;
36
37### methods available to fetch the file depending on the scheme
38$METHODS = {
314f5584 39 http => [ qw|lwp wget curl lftp lynx iosock| ],
6e654618
JB
40 ftp => [ qw|lwp netftp wget curl lftp ncftp ftp| ],
41 file => [ qw|lwp lftp file| ],
79fd8837
JB
42 rsync => [ qw|rsync| ]
43};
44
45### silly warnings ###
46local $Params::Check::VERBOSE = 1;
47local $Params::Check::VERBOSE = 1;
48local $Module::Load::Conditional::VERBOSE = 0;
49local $Module::Load::Conditional::VERBOSE = 0;
50
51### see what OS we are on, important for file:// uris ###
6b6e6e92
SH
52use constant ON_WIN => ($^O eq 'MSWin32');
53use constant ON_VMS => ($^O eq 'VMS');
54use constant ON_UNIX => (!ON_WIN);
55use constant HAS_VOL => (ON_WIN);
56use constant HAS_SHARE => (ON_WIN);
57
58
79fd8837
JB
59=pod
60
61=head1 NAME
62
63File::Fetch - A generic file fetching mechanism
64
65=head1 SYNOPSIS
66
67 use File::Fetch;
68
69 ### build a File::Fetch object ###
70 my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
71
72 ### fetch the uri to cwd() ###
73 my $where = $ff->fetch() or die $ff->error;
74
75 ### fetch the uri to /tmp ###
76 my $where = $ff->fetch( to => '/tmp' );
77
78 ### parsed bits from the uri ###
79 $ff->uri;
80 $ff->scheme;
81 $ff->host;
82 $ff->path;
83 $ff->file;
84
85=head1 DESCRIPTION
86
87File::Fetch is a generic file fetching mechanism.
88
89It allows you to fetch any file pointed to by a C<ftp>, C<http>,
90C<file>, or C<rsync> uri by a number of different means.
91
92See the C<HOW IT WORKS> section further down for details.
93
d4b3706f
RGS
94=head1 ACCESSORS
95
96A C<File::Fetch> object has the following accessors
97
98=over 4
99
100=item $ff->uri
101
102The uri you passed to the constructor
103
104=item $ff->scheme
105
106The scheme from the uri (like 'file', 'http', etc)
107
108=item $ff->host
109
fe98d82b
RGS
110The hostname in the uri. Will be empty if host was originally
111'localhost' for a 'file://' url.
112
113=item $ff->vol
114
115On operating systems with the concept of a volume the second element
116of a file:// is considered to the be volume specification for the file.
1f80753b
JM
117Thus on Win32 this routine returns the volume, on other operating
118systems this returns nothing.
fe98d82b
RGS
119
120On Windows this value may be empty if the uri is to a network share, in
121which case the 'share' property will be defined. Additionally, volume
122specifications that use '|' as ':' will be converted on read to use ':'.
123
1f80753b
JM
124On VMS, which has a volume concept, this field will be empty because VMS
125file specifications are converted to absolute UNIX format and the volume
126information is transparently included.
127
fe98d82b
RGS
128=item $ff->share
129
130On systems with the concept of a network share (currently only Windows) returns
131the sharename from a file://// url. On other operating systems returns empty.
d4b3706f
RGS
132
133=item $ff->path
134
135The path from the uri, will be at least a single '/'.
136
137=item $ff->file
138
139The name of the remote file. For the local file name, the
140result of $ff->output_file will be used.
141
142=cut
143
144
145##########################
146### Object & Accessors ###
147##########################
148
149{
6b6e6e92 150 ### template for autogenerated accessors ###
d4b3706f
RGS
151 my $Tmpl = {
152 scheme => { default => 'http' },
153 host => { default => 'localhost' },
154 path => { default => '/' },
155 file => { required => 1 },
156 uri => { required => 1 },
1f80753b 157 vol => { default => '' }, # windows for file:// uris
fe98d82b 158 share => { default => '' }, # windows for file:// uris
d4b3706f
RGS
159 _error_msg => { no_override => 1 },
160 _error_msg_long => { no_override => 1 },
161 };
162
163 for my $method ( keys %$Tmpl ) {
164 no strict 'refs';
165 *$method = sub {
166 my $self = shift;
167 $self->{$method} = $_[0] if @_;
168 return $self->{$method};
169 }
170 }
171
172 sub _create {
173 my $class = shift;
174 my %hash = @_;
175
176 my $args = check( $Tmpl, \%hash ) or return;
177
178 bless $args, $class;
179
180 if( lc($args->scheme) ne 'file' and not $args->host ) {
16610ad9 181 return $class->_error(loc(
d4b3706f
RGS
182 "Hostname required when fetching from '%1'",$args->scheme));
183 }
184
185 for (qw[path file]) {
9e5ea595 186 unless( $args->$_() ) { # 5.5.x needs the ()
16610ad9 187 return $class->_error(loc("No '%1' specified",$_));
d4b3706f
RGS
188 }
189 }
190
191 return $args;
192 }
193}
194
195=item $ff->output_file
196
197The name of the output file. This is the same as $ff->file,
198but any query parameters are stripped off. For example:
199
200 http://example.com/index.html?x=y
201
202would make the output file be C<index.html> rather than
203C<index.html?x=y>.
204
205=back
206
207=cut
208
209sub output_file {
210 my $self = shift;
211 my $file = $self->file;
212
213 $file =~ s/\?.*$//g;
214
215 return $file;
216}
217
218### XXX do this or just point to URI::Escape?
219# =head2 $esc_uri = $ff->escaped_uri
220#
221# =cut
222#
223# ### most of this is stolen straight from URI::escape
224# { ### Build a char->hex map
225# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
226#
227# sub escaped_uri {
228# my $self = shift;
229# my $uri = $self->uri;
230#
231# ### Default unsafe characters. RFC 2732 ^(uric - reserved)
232# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
233# $escapes{$1} || $self->_fail_hi($1)/ge;
234#
235# return $uri;
236# }
237#
238# sub _fail_hi {
239# my $self = shift;
240# my $char = shift;
241#
242# $self->_error(loc(
243# "Can't escape '%1', try using the '%2' module instead",
244# sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
245# ));
246# }
247#
248# sub output_file {
249#
250# }
251#
252#
253# }
254
79fd8837
JB
255=head1 METHODS
256
257=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
258
259Parses the uri and creates a corresponding File::Fetch::Item object,
260that is ready to be C<fetch>ed and returns it.
261
262Returns false on failure.
263
264=cut
265
266sub new {
267 my $class = shift;
268 my %hash = @_;
269
270 my ($uri);
271 my $tmpl = {
272 uri => { required => 1, store => \$uri },
273 };
274
275 check( $tmpl, \%hash ) or return;
276
277 ### parse the uri to usable parts ###
16610ad9 278 my $href = $class->_parse_uri( $uri ) or return;
79fd8837
JB
279
280 ### make it into a FFI object ###
16610ad9 281 my $ff = $class->_create( %$href ) or return;
79fd8837
JB
282
283
284 ### return the object ###
d4b3706f 285 return $ff;
79fd8837
JB
286}
287
288### parses an uri to a hash structure:
289###
290### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
291###
292### becomes:
293###
294### $href = {
295### scheme => 'ftp',
296### host => 'ftp.cpan.org',
297### path => '/pub/mirror',
298### file => 'index.html'
299### };
300###
9e5ea595
RGS
301### In the case of file:// urls there maybe be additional fields
302###
5e6d05d2 303### For systems with volume specifications such as Win32 there will be
fe98d82b
RGS
304### a volume specifier provided in the 'vol' field.
305###
306### 'vol' => 'volumename'
307###
9e5ea595
RGS
308### For windows file shares there may be a 'share' key specified
309###
310### 'share' => 'sharename'
311###
fe98d82b
RGS
312### Note that the rules of what a file:// url means vary by the operating system
313### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
314### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
315### not '/foo/bar.txt'
9e5ea595 316###
fe98d82b
RGS
317### Similarly if the host interpreting the url is VMS then
318### file:///disk$user/my/notes/note12345.txt' means
1f80753b
JM
319### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
320### if it is unix where it means /disk$user/my/notes/note12345.txt'.
321### Except for some cases in the File::Spec methods, Perl on VMS will generally
322### handle UNIX format file specifications.
fe98d82b
RGS
323###
324### This means it is impossible to serve certain file:// urls on certain systems.
325###
326### Thus are the problems with a protocol-less specification. :-(
9e5ea595
RGS
327###
328
79fd8837
JB
329sub _parse_uri {
330 my $self = shift;
331 my $uri = shift or return;
332
333 my $href = { uri => $uri };
334
335 ### find the scheme ###
336 $uri =~ s|^(\w+)://||;
337 $href->{scheme} = $1;
338
9e5ea595
RGS
339 ### See rfc 1738 section 3.10
340 ### http://www.faqs.org/rfcs/rfc1738.html
341 ### And wikipedia for more on windows file:// urls
342 ### http://en.wikipedia.org/wiki/File://
79fd8837 343 if( $href->{scheme} eq 'file' ) {
9e5ea595
RGS
344
345 my @parts = split '/',$uri;
346
347 ### file://hostname/...
348 ### file://hostname/...
fe98d82b 349 ### normalize file://localhost with file:///
9e5ea595
RGS
350 $href->{host} = $parts[0] || '';
351
352 ### index in @parts where the path components begin;
353 my $index = 1;
9e5ea595
RGS
354
355 ### file:////hostname/sharename/blah.txt
fe98d82b
RGS
356 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
357
9e5ea595
RGS
358 $href->{host} = $parts[2] || ''; # avoid warnings
359 $href->{share} = $parts[3] || ''; # avoid warnings
360
361 $index = 4 # index after the share
9e5ea595 362
fe98d82b
RGS
363 ### file:///D|/blah.txt
364 ### file:///D:/blah.txt
fe98d82b
RGS
365 } elsif (HAS_VOL) {
366
367 ### this code comes from dmq's patch, but:
368 ### XXX if volume is empty, wouldn't that be an error? --kane
369 ### if so, our file://localhost test needs to be fixed as wel
370 $href->{vol} = $parts[1] || '';
371
372 ### correct D| style colume descriptors
373 $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
374
375 $index = 2; # index after the volume
376 }
377
378 ### rebuild the path from the leftover parts;
9e5ea595 379 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
79fd8837
JB
380
381 } else {
9e5ea595
RGS
382 ### using anything but qw() in hash slices may produce warnings
383 ### in older perls :-(
384 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
79fd8837
JB
385 }
386
387 ### split the path into file + dir ###
388 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
389 $href->{path} = $parts[1];
390 $href->{file} = $parts[2];
391 }
392
fe98d82b
RGS
393 ### host will be empty if the target was 'localhost' and the
394 ### scheme was 'file'
395 $href->{host} = '' if ($href->{host} eq 'localhost') and
396 ($href->{scheme} eq 'file');
79fd8837
JB
397
398 return $href;
399}
400
8d16e270 401=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
79fd8837 402
8d16e270
JB
403Fetches the file you requested and returns the full path to the file.
404
405By default it writes to C<cwd()>, but you can override that by specifying
406the C<to> argument:
407
408 ### file fetch to /tmp, full path to the file in $where
409 $where = $ff->fetch( to => '/tmp' );
410
411 ### file slurped into $scalar, full path to the file in $where
412 ### file is downloaded to a temp directory and cleaned up at exit time
413 $where = $ff->fetch( to => \$scalar );
79fd8837
JB
414
415Returns the full path to the downloaded file on success, and false
416on failure.
417
418=cut
419
420sub fetch {
421 my $self = shift or return;
422 my %hash = @_;
423
8d16e270 424 my $target;
79fd8837 425 my $tmpl = {
8d16e270 426 to => { default => cwd(), store => \$target },
79fd8837
JB
427 };
428
429 check( $tmpl, \%hash ) or return;
430
8d16e270
JB
431 my ($to, $fh);
432 ### you want us to slurp the contents
433 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
434 $to = tempdir( 'FileFetch.XXXXXX', CLEANUP => 1 );
435
436 ### plain old fetch
437 } else {
438 $to = $target;
1f80753b 439
8d16e270
JB
440 ### On VMS force to VMS format so File::Spec will work.
441 $to = VMS::Filespec::vmspath($to) if ON_VMS;
79fd8837 442
8d16e270
JB
443 ### create the path if it doesn't exist yet ###
444 unless( -d $to ) {
445 eval { mkpath( $to ) };
446
447 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
448 }
79fd8837
JB
449 }
450
451 ### set passive ftp if required ###
452 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
453
fe98d82b
RGS
454 ### we dont use catfile on win32 because if we are using a cygwin tool
455 ### under cmd.exe they wont understand windows style separators.
456 my $out_to = ON_WIN ? $to.'/'.$self->output_file
457 : File::Spec->catfile( $to, $self->output_file );
458
79fd8837
JB
459 for my $method ( @{ $METHODS->{$self->scheme} } ) {
460 my $sub = '_'.$method.'_fetch';
461
462 unless( __PACKAGE__->can($sub) ) {
463 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
464 $method));
465 next;
466 }
467
468 ### method is blacklisted ###
469 next if grep { lc $_ eq $method } @$BLACKLIST;
470
471 ### method is known to fail ###
472 next if $METHOD_FAIL->{$method};
473
d4b3706f
RGS
474 ### there's serious issues with IPC::Run and quoting of command
475 ### line arguments. using quotes in the wrong place breaks things,
476 ### and in the case of say,
477 ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
478 ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
479 ### it doesn't matter how you quote, it always fails.
480 local $IPC::Cmd::USE_IPC_RUN = 0;
481
482 if( my $file = $self->$sub(
a0ad4830 483 to => $out_to
d4b3706f 484 )){
79fd8837
JB
485
486 unless( -e $file && -s _ ) {
487 $self->_error(loc("'%1' said it fetched '%2', ".
488 "but it was not created",$method,$file));
489
490 ### mark the failure ###
491 $METHOD_FAIL->{$method} = 1;
492
493 next;
494
495 } else {
496
8d16e270
JB
497 ### slurp mode?
498 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
499
500 ### open the file
501 open my $fh, $file or do {
502 $self->_error(
503 loc("Could not open '%1': %2", $file, $!));
504 return;
505 };
506
507 ### slurp
508 $$target = do { local $/; <$fh> };
509
510 }
511
79fd8837
JB
512 my $abs = File::Spec->rel2abs( $file );
513 return $abs;
8d16e270 514
79fd8837
JB
515 }
516 }
517 }
518
519
520 ### if we got here, we looped over all methods, but we weren't able
521 ### to fetch it.
522 return;
523}
524
79fd8837
JB
525########################
526### _*_fetch methods ###
527########################
528
529### LWP fetching ###
530sub _lwp_fetch {
531 my $self = shift;
532 my %hash = @_;
533
534 my ($to);
535 my $tmpl = {
536 to => { required => 1, store => \$to }
537 };
538 check( $tmpl, \%hash ) or return;
539
540 ### modules required to download with lwp ###
541 my $use_list = {
542 LWP => '0.0',
543 'LWP::UserAgent' => '0.0',
544 'HTTP::Request' => '0.0',
545 'HTTP::Status' => '0.0',
546 URI => '0.0',
547
548 };
549
550 if( can_load(modules => $use_list) ) {
551
552 ### setup the uri object
553 my $uri = URI->new( File::Spec::Unix->catfile(
554 $self->path, $self->file
555 ) );
556
557 ### special rules apply for file:// uris ###
558 $uri->scheme( $self->scheme );
559 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
560 $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
561
562 ### set up the useragent object
563 my $ua = LWP::UserAgent->new();
564 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
565 $ua->agent( $USER_AGENT );
566 $ua->from( $FROM_EMAIL );
567 $ua->env_proxy;
568
569 my $res = $ua->mirror($uri, $to) or return;
570
571 ### uptodate or fetched ok ###
572 if ( $res->code == 304 or $res->code == 200 ) {
573 return $to;
574
575 } else {
576 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
577 $res->code, HTTP::Status::status_message($res->code),
578 $res->status_line));
579 }
580
581 } else {
582 $METHOD_FAIL->{'lwp'} = 1;
583 return;
584 }
585}
586
314f5584
CBW
587### Simple IO::Socket::INET fetching ###
588sub _iosock_fetch {
589 my $self = shift;
590 my %hash = @_;
591
592 my ($to);
593 my $tmpl = {
594 to => { required => 1, store => \$to }
595 };
596 check( $tmpl, \%hash ) or return;
597
598 my $use_list = {
599 'IO::Socket::INET' => '0.0',
600 'IO::Select' => '0.0',
601 };
602
603 if( can_load(modules => $use_list) ) {
604 my $sock = IO::Socket::INET->new(
605 PeerHost => $self->host,
606 ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
607 );
608
609 unless ( $sock ) {
610 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
611 }
612
613 my $fh = FileHandle->new;
614
615 # Check open()
616
617 unless ( $fh->open($to,'>') ) {
618 return $self->_error(loc(
619 "Could not open '%1' for writing: %2",$to,$!));
620 }
621
af24cc9d
CBW
622 my $path = File::Spec::Unix->catfile( $self->path, $self->file );
623 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
624 $sock->send( $req );
314f5584
CBW
625
626 my $select = IO::Select->new( $sock );
627
628 my $resp = '';
629 my $normal = 0;
630 while ( $select->can_read( $TIMEOUT || 60 ) ) {
631 my $ret = $sock->sysread( $resp, 4096, length($resp) );
632 if ( !defined $ret or $ret == 0 ) {
633 $select->remove( $sock );
634 $normal++;
635 }
636 }
637 close $sock;
638
639 unless ( $normal ) {
640 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
641 }
642
af24cc9d
CBW
643 # Check the "response"
644 # Strip preceeding blank lines apparently they are allowed (RFC 2616 4.1)
645 $resp =~ s/^(\x0d?\x0a)+//;
646 # Check it is an HTTP response
647 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
648 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
649 }
650
651 # Check for OK
652 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
653 unless ( $code eq '200' ) {
654 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
655 }
656
314f5584
CBW
657 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
658 close $fh;
659 return $to;
660
661 } else {
662 $METHOD_FAIL->{'iosock'} = 1;
663 return;
664 }
665}
666
79fd8837
JB
667### Net::FTP fetching
668sub _netftp_fetch {
669 my $self = shift;
670 my %hash = @_;
671
672 my ($to);
673 my $tmpl = {
674 to => { required => 1, store => \$to }
675 };
676 check( $tmpl, \%hash ) or return;
677
678 ### required modules ###
679 my $use_list = { 'Net::FTP' => 0 };
680
681 if( can_load( modules => $use_list ) ) {
682
683 ### make connection ###
684 my $ftp;
685 my @options = ($self->host);
686 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
687 unless( $ftp = Net::FTP->new( @options ) ) {
688 return $self->_error(loc("Ftp creation failed: %1",$@));
689 }
690
691 ### login ###
692 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
693 return $self->_error(loc("Could not login to '%1'",$self->host));
694 }
695
696 ### set binary mode, just in case ###
697 $ftp->binary;
698
699 ### create the remote path
700 ### remember remote paths are unix paths! [#11483]
701 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
702
703 ### fetch the file ###
704 my $target;
705 unless( $target = $ftp->get( $remote, $to ) ) {
706 return $self->_error(loc("Could not fetch '%1' from '%2'",
707 $remote, $self->host));
708 }
709
710 ### log out ###
711 $ftp->quit;
712
713 return $target;
714
715 } else {
716 $METHOD_FAIL->{'netftp'} = 1;
717 return;
718 }
719}
720
721### /bin/wget fetch ###
722sub _wget_fetch {
723 my $self = shift;
724 my %hash = @_;
725
726 my ($to);
727 my $tmpl = {
728 to => { required => 1, store => \$to }
729 };
730 check( $tmpl, \%hash ) or return;
731
732 ### see if we have a wget binary ###
733 if( my $wget = can_run('wget') ) {
734
735 ### no verboseness, thanks ###
736 my $cmd = [ $wget, '--quiet' ];
737
738 ### if a timeout is set, add it ###
739 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
740
741 ### run passive if specified ###
742 push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
743
744 ### set the output document, add the uri ###
6e654618
JB
745 push @$cmd, '--output-document', $to, $self->uri;
746
747 ### with IPC::Cmd > 0.41, this is fixed in teh library,
748 ### and there's no need for special casing any more.
749 ### DO NOT quote things for IPC::Run, it breaks stuff.
750 # $IPC::Cmd::USE_IPC_RUN
751 # ? ($to, $self->uri)
752 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837
JB
753
754 ### shell out ###
755 my $captured;
d4b3706f
RGS
756 unless(run( command => $cmd,
757 buffer => \$captured,
758 verbose => $DEBUG
759 )) {
79fd8837
JB
760 ### wget creates the output document always, even if the fetch
761 ### fails.. so unlink it in that case
762 1 while unlink $to;
763
764 return $self->_error(loc( "Command failed: %1", $captured || '' ));
765 }
766
767 return $to;
768
769 } else {
770 $METHOD_FAIL->{'wget'} = 1;
771 return;
772 }
773}
774
6e654618
JB
775### /bin/lftp fetch ###
776sub _lftp_fetch {
777 my $self = shift;
778 my %hash = @_;
779
780 my ($to);
781 my $tmpl = {
782 to => { required => 1, store => \$to }
783 };
784 check( $tmpl, \%hash ) or return;
785
786 ### see if we have a wget binary ###
787 if( my $lftp = can_run('lftp') ) {
788
789 ### no verboseness, thanks ###
790 my $cmd = [ $lftp, '-f' ];
791
792 my $fh = File::Temp->new;
793
794 my $str;
795
796 ### if a timeout is set, add it ###
797 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
798
799 ### run passive if specified ###
800 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
801
802 ### set the output document, add the uri ###
803 ### quote the URI, because lftp supports certain shell
804 ### expansions, most notably & for backgrounding.
805 ### ' quote does nto work, must be "
806 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
807
808 if( $DEBUG ) {
809 my $pp_str = join ' ', split $/, $str;
810 print "# lftp command: $pp_str\n";
811 }
812
813 ### write straight to the file.
814 $fh->autoflush(1);
815 print $fh $str;
816
817 ### the command needs to be 1 string to be executed
818 push @$cmd, $fh->filename;
819
820 ### with IPC::Cmd > 0.41, this is fixed in teh library,
821 ### and there's no need for special casing any more.
822 ### DO NOT quote things for IPC::Run, it breaks stuff.
823 # $IPC::Cmd::USE_IPC_RUN
824 # ? ($to, $self->uri)
825 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
826
827
828 ### shell out ###
829 my $captured;
830 unless(run( command => $cmd,
831 buffer => \$captured,
832 verbose => $DEBUG
833 )) {
834 ### wget creates the output document always, even if the fetch
835 ### fails.. so unlink it in that case
836 1 while unlink $to;
837
838 return $self->_error(loc( "Command failed: %1", $captured || '' ));
839 }
840
841 return $to;
842
843 } else {
844 $METHOD_FAIL->{'lftp'} = 1;
845 return;
846 }
847}
848
849
79fd8837
JB
850
851### /bin/ftp fetch ###
852sub _ftp_fetch {
853 my $self = shift;
854 my %hash = @_;
855
856 my ($to);
857 my $tmpl = {
858 to => { required => 1, store => \$to }
859 };
860 check( $tmpl, \%hash ) or return;
861
d4b3706f 862 ### see if we have a ftp binary ###
79fd8837
JB
863 if( my $ftp = can_run('ftp') ) {
864
865 my $fh = FileHandle->new;
866
867 local $SIG{CHLD} = 'IGNORE';
868
869 unless ($fh->open("|$ftp -n")) {
870 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
871 }
872
873 my @dialog = (
874 "lcd " . dirname($to),
875 "open " . $self->host,
876 "user anonymous $FROM_EMAIL",
877 "cd /",
878 "cd " . $self->path,
879 "binary",
d4b3706f 880 "get " . $self->file . " " . $self->output_file,
79fd8837
JB
881 "quit",
882 );
883
884 foreach (@dialog) { $fh->print($_, "\n") }
885 $fh->close or return;
886
887 return $to;
888 }
889}
890
891### lynx is stupid - it decompresses any .gz file it finds to be text
892### use /bin/lynx to fetch files
893sub _lynx_fetch {
894 my $self = shift;
895 my %hash = @_;
896
897 my ($to);
898 my $tmpl = {
899 to => { required => 1, store => \$to }
900 };
901 check( $tmpl, \%hash ) or return;
902
d4b3706f 903 ### see if we have a lynx binary ###
79fd8837
JB
904 if( my $lynx = can_run('lynx') ) {
905
d4b3706f
RGS
906 unless( IPC::Cmd->can_capture_buffer ) {
907 $METHOD_FAIL->{'lynx'} = 1;
908
909 return $self->_error(loc(
910 "Can not capture buffers. Can not use '%1' to fetch files",
911 'lynx' ));
912 }
79fd8837 913
6e654618
JB
914 ### check if the HTTP resource exists ###
915 if ($self->uri =~ /^https?:\/\//i) {
916 my $cmd = [
917 $lynx,
918 '-head',
919 '-source',
920 "-auth=anonymous:$FROM_EMAIL",
921 ];
922
923 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
924
925 push @$cmd, $self->uri;
926
927 ### shell out ###
928 my $head;
929 unless(run( command => $cmd,
930 buffer => \$head,
931 verbose => $DEBUG )
932 ) {
933 return $self->_error(loc("Command failed: %1", $head || ''));
934 }
935
936 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
937 return $self->_error(loc("Command failed: %1", $head || ''));
938 }
939 }
940
79fd8837
JB
941 ### write to the output file ourselves, since lynx ass_u_mes to much
942 my $local = FileHandle->new(">$to")
943 or return $self->_error(loc(
944 "Could not open '%1' for writing: %2",$to,$!));
945
946 ### dump to stdout ###
947 my $cmd = [
948 $lynx,
949 '-source',
950 "-auth=anonymous:$FROM_EMAIL",
951 ];
952
953 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
954
d4b3706f 955 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618
JB
956 push @$cmd, $self->uri;
957
958 ### with IPC::Cmd > 0.41, this is fixed in teh library,
959 ### and there's no need for special casing any more.
960 ### DO NOT quote things for IPC::Run, it breaks stuff.
961 # $IPC::Cmd::USE_IPC_RUN
962 # ? $self->uri
963 # : QUOTE. $self->uri .QUOTE;
d4b3706f 964
79fd8837
JB
965
966 ### shell out ###
967 my $captured;
968 unless(run( command => $cmd,
969 buffer => \$captured,
970 verbose => $DEBUG )
971 ) {
972 return $self->_error(loc("Command failed: %1", $captured || ''));
973 }
974
975 ### print to local file ###
976 ### XXX on a 404 with a special error page, $captured will actually
977 ### hold the contents of that page, and make it *appear* like the
978 ### request was a success, when really it wasn't :(
979 ### there doesn't seem to be an option for lynx to change the exit
980 ### code based on a 4XX status or so.
981 ### the closest we can come is using --error_file and parsing that,
982 ### which is very unreliable ;(
983 $local->print( $captured );
984 $local->close or return;
985
986 return $to;
987
988 } else {
989 $METHOD_FAIL->{'lynx'} = 1;
990 return;
991 }
992}
993
994### use /bin/ncftp to fetch files
995sub _ncftp_fetch {
996 my $self = shift;
997 my %hash = @_;
998
999 my ($to);
1000 my $tmpl = {
1001 to => { required => 1, store => \$to }
1002 };
1003 check( $tmpl, \%hash ) or return;
1004
1005 ### we can only set passive mode in interactive sesssions, so bail out
1006 ### if $FTP_PASSIVE is set
1007 return if $FTP_PASSIVE;
1008
d4b3706f 1009 ### see if we have a ncftp binary ###
79fd8837
JB
1010 if( my $ncftp = can_run('ncftp') ) {
1011
1012 my $cmd = [
1013 $ncftp,
1014 '-V', # do not be verbose
1015 '-p', $FROM_EMAIL, # email as password
1016 $self->host, # hostname
1017 dirname($to), # local dir for the file
1018 # remote path to the file
d4b3706f
RGS
1019 ### DO NOT quote things for IPC::Run, it breaks stuff.
1020 $IPC::Cmd::USE_IPC_RUN
1021 ? File::Spec::Unix->catdir( $self->path, $self->file )
1022 : QUOTE. File::Spec::Unix->catdir(
1023 $self->path, $self->file ) .QUOTE
1024
79fd8837
JB
1025 ];
1026
1027 ### shell out ###
1028 my $captured;
1029 unless(run( command => $cmd,
1030 buffer => \$captured,
1031 verbose => $DEBUG )
1032 ) {
1033 return $self->_error(loc("Command failed: %1", $captured || ''));
1034 }
1035
1036 return $to;
1037
1038 } else {
1039 $METHOD_FAIL->{'ncftp'} = 1;
1040 return;
1041 }
1042}
1043
1044### use /bin/curl to fetch files
1045sub _curl_fetch {
1046 my $self = shift;
1047 my %hash = @_;
1048
1049 my ($to);
1050 my $tmpl = {
1051 to => { required => 1, store => \$to }
1052 };
1053 check( $tmpl, \%hash ) or return;
1054
1055 if (my $curl = can_run('curl')) {
1056
1057 ### these long opts are self explanatory - I like that -jmb
6e654618 1058 my $cmd = [ $curl, '-q' ];
79fd8837
JB
1059
1060 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1061
1062 push(@$cmd, '--silent') unless $DEBUG;
1063
1064 ### curl does the right thing with passive, regardless ###
1065 if ($self->scheme eq 'ftp') {
1066 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1067 }
1068
1069 ### curl doesn't follow 302 (temporarily moved) etc automatically
1070 ### so we add --location to enable that.
6e654618
JB
1071 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1072
1073 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1074 ### and there's no need for special casing any more.
1075 ### DO NOT quote things for IPC::Run, it breaks stuff.
1076 # $IPC::Cmd::USE_IPC_RUN
1077 # ? ($to, $self->uri)
1078 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1079
79fd8837
JB
1080
1081 my $captured;
1082 unless(run( command => $cmd,
1083 buffer => \$captured,
1084 verbose => $DEBUG )
1085 ) {
1086
1087 return $self->_error(loc("Command failed: %1", $captured || ''));
1088 }
1089
1090 return $to;
1091
1092 } else {
1093 $METHOD_FAIL->{'curl'} = 1;
1094 return;
1095 }
1096}
1097
1098
1099### use File::Copy for fetching file:// urls ###
9e5ea595
RGS
1100###
1101### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1102### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
fe98d82b 1103###
9e5ea595 1104
79fd8837
JB
1105sub _file_fetch {
1106 my $self = shift;
1107 my %hash = @_;
1108
1109 my ($to);
1110 my $tmpl = {
1111 to => { required => 1, store => \$to }
1112 };
1113 check( $tmpl, \%hash ) or return;
1114
9e5ea595
RGS
1115
1116
79fd8837
JB
1117 ### prefix a / on unix systems with a file uri, since it would
1118 ### look somewhat like this:
9e5ea595
RGS
1119 ### file:///home/kane/file
1120 ### wheras windows file uris for 'c:\some\dir\file' might look like:
1121 ### file:///C:/some/dir/file
1122 ### file:///C|/some/dir/file
1123 ### or for a network share '\\host\share\some\dir\file':
1124 ### file:////host/share/some/dir/file
1125 ###
1126 ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1127 ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1128 ###
1129
1130 my $path = $self->path;
1131 my $vol = $self->vol;
1132 my $share = $self->share;
1133
1134 my $remote;
1135 if (!$share and $self->host) {
1136 return $self->_error(loc(
1137 "Currently %1 cannot handle hosts in %2 urls",
1138 'File::Fetch', 'file://'
1139 ));
1140 }
1141
1142 if( $vol ) {
1143 $path = File::Spec->catdir( split /\//, $path );
1144 $remote = File::Spec->catpath( $vol, $path, $self->file);
79fd8837 1145
9e5ea595
RGS
1146 } elsif( $share ) {
1147 ### win32 specific, and a share name, so we wont bother with File::Spec
1148 $path =~ s|/+|\\|g;
1149 $remote = "\\\\".$self->host."\\$share\\$path";
1150
1151 } else {
5e6d05d2
RGS
1152 ### File::Spec on VMS can not currently handle UNIX syntax.
1153 my $file_class = ON_VMS
1154 ? 'File::Spec::Unix'
1155 : 'File::Spec';
1156
1157 $remote = $file_class->catfile( $path, $self->file );
9e5ea595 1158 }
79fd8837
JB
1159
1160 ### File::Copy is littered with 'die' statements :( ###
1161 my $rv = eval { File::Copy::copy( $remote, $to ) };
1162
1163 ### something went wrong ###
1164 if( !$rv or $@ ) {
1165 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1166 $remote, $to, $!, $@));
1167 }
1168
1169 return $to;
1170}
1171
1172### use /usr/bin/rsync to fetch files
1173sub _rsync_fetch {
1174 my $self = shift;
1175 my %hash = @_;
1176
1177 my ($to);
1178 my $tmpl = {
1179 to => { required => 1, store => \$to }
1180 };
1181 check( $tmpl, \%hash ) or return;
1182
1183 if (my $rsync = can_run('rsync')) {
1184
1185 my $cmd = [ $rsync ];
1186
1187 ### XXX: rsync has no I/O timeouts at all, by default
1188 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1189
1190 push(@$cmd, '--quiet') unless $DEBUG;
1191
d4b3706f 1192 ### DO NOT quote things for IPC::Run, it breaks stuff.
6e654618
JB
1193 push @$cmd, $self->uri, $to;
1194
1195 ### with IPC::Cmd > 0.41, this is fixed in teh library,
1196 ### and there's no need for special casing any more.
1197 ### DO NOT quote things for IPC::Run, it breaks stuff.
1198 # $IPC::Cmd::USE_IPC_RUN
1199 # ? ($to, $self->uri)
1200 # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
79fd8837
JB
1201
1202 my $captured;
1203 unless(run( command => $cmd,
1204 buffer => \$captured,
1205 verbose => $DEBUG )
1206 ) {
1207
fe98d82b
RGS
1208 return $self->_error(loc("Command %1 failed: %2",
1209 "@$cmd" || '', $captured || ''));
79fd8837
JB
1210 }
1211
1212 return $to;
1213
1214 } else {
1215 $METHOD_FAIL->{'rsync'} = 1;
1216 return;
1217 }
1218}
1219
1220#################################
1221#
1222# Error code
1223#
1224#################################
1225
1226=pod
1227
1228=head2 $ff->error([BOOL])
1229
1230Returns the last encountered error as string.
1231Pass it a true value to get the C<Carp::longmess()> output instead.
1232
1233=cut
1234
d4b3706f
RGS
1235### error handling the way Archive::Extract does it
1236sub _error {
1237 my $self = shift;
1238 my $error = shift;
1239
1240 $self->_error_msg( $error );
1241 $self->_error_msg_long( Carp::longmess($error) );
1242
1243 if( $WARN ) {
1244 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
79fd8837
JB
1245 }
1246
d4b3706f 1247 return;
79fd8837
JB
1248}
1249
d4b3706f
RGS
1250sub error {
1251 my $self = shift;
1252 return shift() ? $self->_error_msg_long : $self->_error_msg;
1253}
79fd8837
JB
1254
1255
12561;
1257
1258=pod
1259
1260=head1 HOW IT WORKS
1261
1262File::Fetch is able to fetch a variety of uris, by using several
1263external programs and modules.
1264
1265Below is a mapping of what utilities will be used in what order
1266for what schemes, if available:
1267
6e654618 1268 file => LWP, lftp, file
314f5584 1269 http => LWP, wget, curl, lftp, lynx, iosock
6e654618 1270 ftp => LWP, Net::FTP, wget, curl, lftp, ncftp, ftp
79fd8837
JB
1271 rsync => rsync
1272
1273If you'd like to disable the use of one or more of these utilities
1274and/or modules, see the C<$BLACKLIST> variable further down.
1275
1276If a utility or module isn't available, it will be marked in a cache
1277(see the C<$METHOD_FAIL> variable further down), so it will not be
1278tried again. The C<fetch> method will only fail when all options are
1279exhausted, and it was not able to retrieve the file.
1280
314f5584
CBW
1281C<iosock> is a very limited L<IO::Socket::INET> based mechanism for
1282retrieving C<http> schemed urls. It doesn't follow redirects for instance.
1283
79fd8837
JB
1284A special note about fetching files from an ftp uri:
1285
1286By default, all ftp connections are done in passive mode. To change
1287that, see the C<$FTP_PASSIVE> variable further down.
1288
1289Furthermore, ftp uris only support anonymous connections, so no
1290named user/password pair can be passed along.
1291
1292C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable
1293further down.
1294
1295=head1 GLOBAL VARIABLES
1296
1297The behaviour of File::Fetch can be altered by changing the following
1298global variables:
1299
1300=head2 $File::Fetch::FROM_EMAIL
1301
1302This is the email address that will be sent as your anonymous ftp
1303password.
1304
1305Default is C<File-Fetch@example.com>.
1306
1307=head2 $File::Fetch::USER_AGENT
1308
1309This is the useragent as C<LWP> will report it.
1310
1311Default is C<File::Fetch/$VERSION>.
1312
1313=head2 $File::Fetch::FTP_PASSIVE
1314
1315This variable controls whether the environment variable C<FTP_PASSIVE>
1316and any passive switches to commandline tools will be set to true.
1317
1318Default value is 1.
1319
1320Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch
1321files, since passive mode can only be set interactively for this binary
1322
1323=head2 $File::Fetch::TIMEOUT
1324
1325When set, controls the network timeout (counted in seconds).
1326
1327Default value is 0.
1328
1329=head2 $File::Fetch::WARN
1330
1331This variable controls whether errors encountered internally by
1332C<File::Fetch> should be C<carp>'d or not.
1333
1334Set to false to silence warnings. Inspect the output of the C<error()>
1335method manually to see what went wrong.
1336
1337Defaults to C<true>.
1338
1339=head2 $File::Fetch::DEBUG
1340
1341This enables debugging output when calling commandline utilities to
1342fetch files.
1343This also enables C<Carp::longmess> errors, instead of the regular
1344C<carp> errors.
1345
1346Good for tracking down why things don't work with your particular
1347setup.
1348
1349Default is 0.
1350
1351=head2 $File::Fetch::BLACKLIST
1352
1353This is an array ref holding blacklisted modules/utilities for fetching
1354files with.
1355
1356To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could
1357set $File::Fetch::BLACKLIST to:
1358
1359 $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1360
1361The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable.
1362
1363See the note on C<MAPPING> below.
1364
1365=head2 $File::Fetch::METHOD_FAIL
1366
1367This is a hashref registering what modules/utilities were known to fail
1368for fetching files (mostly because they weren't installed).
1369
1370You can reset this cache by assigning an empty hashref to it, or
1371individually remove keys.
1372
1373See the note on C<MAPPING> below.
1374
1375=head1 MAPPING
1376
1377
1378Here's a quick mapping for the utilities/modules, and their names for
1379the $BLACKLIST, $METHOD_FAIL and other internal functions.
1380
1381 LWP => lwp
1382 Net::FTP => netftp
1383 wget => wget
1384 lynx => lynx
1385 ncftp => ncftp
1386 ftp => ftp
1387 curl => curl
1388 rsync => rsync
6e654618 1389 lftp => lftp
314f5584 1390 IO::Socket => iosock
79fd8837
JB
1391
1392=head1 FREQUENTLY ASKED QUESTIONS
1393
1394=head2 So how do I use a proxy with File::Fetch?
1395
1396C<File::Fetch> currently only supports proxies with LWP::UserAgent.
1397You will need to set your environment variables accordingly. For
1398example, to use an ftp proxy:
1399
1400 $ENV{ftp_proxy} = 'foo.com';
1401
1402Refer to the LWP::UserAgent manpage for more details.
1403
1404=head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1405
1406C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>,
1407which we in turn capture. If that content is a 'custom' error file
1408(like, say, a C<404 handler>), you will get that contents instead.
1409
1410Sadly, C<lynx> doesn't support any options to return a different exit
1411code on non-C<200 OK> status, giving us no way to tell the difference
1412between a 'successfull' fetch and a custom error page.
1413
1414Therefor, we recommend to only use C<lynx> as a last resort. This is
1415why it is at the back of our list of methods to try as well.
1416
d4b3706f
RGS
1417=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1418
1419C<File::Fetch> is relatively smart about things. When trying to write
1420a file to disk, it removes the C<query parameters> (see the
1421C<output_file> method for details) from the file name before creating
1422it. In most cases this suffices.
1423
1424If you have any other characters you need to escape, please install
1425the C<URI::Escape> module from CPAN, and pre-encode your URI before
1426passing it to C<File::Fetch>. You can read about the details of URIs
1427and URI encoding here:
1428
1429 http://www.faqs.org/rfcs/rfc2396.html
1430
79fd8837
JB
1431=head1 TODO
1432
1433=over 4
1434
1435=item Implement $PREFER_BIN
1436
1437To indicate to rather use commandline tools than modules
1438
a0ad4830
JB
1439=back
1440
1441=head1 BUG REPORTS
1442
1443Please report bugs or other issues to E<lt>bug-file-fetch@rt.cpan.org<gt>.
1444
1445=head1 AUTHOR
79fd8837 1446
d4b3706f 1447This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
79fd8837
JB
1448
1449=head1 COPYRIGHT
1450
a0ad4830
JB
1451This library is free software; you may redistribute and/or modify it
1452under the same terms as Perl itself.
79fd8837 1453
79fd8837
JB
1454
1455=cut
1456
1457# Local variables:
1458# c-indentation-style: bsd
1459# c-basic-offset: 4
1460# indent-tabs-mode: nil
1461# End:
1462# vim: expandtab shiftwidth=4:
1463
1464
1465
1466