Commit | Line | Data |
---|---|---|
79fd8837 JB |
1 | package File::Fetch; |
2 | ||
3 | use strict; | |
4 | use FileHandle; | |
5 | use File::Copy; | |
6 | use File::Spec; | |
7 | use File::Spec::Unix; | |
8 | use File::Fetch::Item; | |
9 | use File::Basename qw[dirname]; | |
10 | ||
11 | use Cwd qw[cwd]; | |
12 | use Carp qw[carp]; | |
13 | use IPC::Cmd qw[can_run run]; | |
14 | use File::Path qw[mkpath]; | |
15 | use Params::Check qw[check]; | |
16 | use Module::Load::Conditional qw[can_load]; | |
17 | use Locale::Maketext::Simple Style => 'gettext'; | |
18 | ||
19 | use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT | |
20 | $BLACKLIST $METHOD_FAIL $VERSION $METHODS | |
21 | $FTP_PASSIVE $TIMEOUT $DEBUG $WARN | |
22 | ]; | |
23 | ||
24 | $VERSION = 0.08; | |
25 | $PREFER_BIN = 0; # XXX TODO implement | |
26 | $FROM_EMAIL = 'File-Fetch@example.com'; | |
27 | $USER_AGENT = 'File::Fetch/$VERSION'; | |
28 | $BLACKLIST = [qw|ftp|]; | |
29 | $METHOD_FAIL = { }; | |
30 | $FTP_PASSIVE = 1; | |
31 | $TIMEOUT = 0; | |
32 | $DEBUG = 0; | |
33 | $WARN = 1; | |
34 | ||
35 | ### methods available to fetch the file depending on the scheme | |
36 | $METHODS = { | |
37 | http => [ qw|lwp wget curl lynx| ], | |
38 | ftp => [ qw|lwp netftp wget curl ncftp ftp| ], | |
39 | file => [ qw|lwp file| ], | |
40 | rsync => [ qw|rsync| ] | |
41 | }; | |
42 | ||
43 | ### silly warnings ### | |
44 | local $Params::Check::VERBOSE = 1; | |
45 | local $Params::Check::VERBOSE = 1; | |
46 | local $Module::Load::Conditional::VERBOSE = 0; | |
47 | local $Module::Load::Conditional::VERBOSE = 0; | |
48 | ||
49 | ### see what OS we are on, important for file:// uris ### | |
50 | use constant ON_UNIX => ($^O ne 'MSWin32' and | |
51 | $^O ne 'MacOS' and | |
52 | $^O ne 'VMS'); | |
53 | ||
54 | =pod | |
55 | ||
56 | =head1 NAME | |
57 | ||
58 | File::Fetch - A generic file fetching mechanism | |
59 | ||
60 | =head1 SYNOPSIS | |
61 | ||
62 | use File::Fetch; | |
63 | ||
64 | ### build a File::Fetch object ### | |
65 | my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); | |
66 | ||
67 | ### fetch the uri to cwd() ### | |
68 | my $where = $ff->fetch() or die $ff->error; | |
69 | ||
70 | ### fetch the uri to /tmp ### | |
71 | my $where = $ff->fetch( to => '/tmp' ); | |
72 | ||
73 | ### parsed bits from the uri ### | |
74 | $ff->uri; | |
75 | $ff->scheme; | |
76 | $ff->host; | |
77 | $ff->path; | |
78 | $ff->file; | |
79 | ||
80 | =head1 DESCRIPTION | |
81 | ||
82 | File::Fetch is a generic file fetching mechanism. | |
83 | ||
84 | It allows you to fetch any file pointed to by a C<ftp>, C<http>, | |
85 | C<file>, or C<rsync> uri by a number of different means. | |
86 | ||
87 | See the C<HOW IT WORKS> section further down for details. | |
88 | ||
89 | =head1 METHODS | |
90 | ||
91 | =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); | |
92 | ||
93 | Parses the uri and creates a corresponding File::Fetch::Item object, | |
94 | that is ready to be C<fetch>ed and returns it. | |
95 | ||
96 | Returns false on failure. | |
97 | ||
98 | =cut | |
99 | ||
100 | sub new { | |
101 | my $class = shift; | |
102 | my %hash = @_; | |
103 | ||
104 | my ($uri); | |
105 | my $tmpl = { | |
106 | uri => { required => 1, store => \$uri }, | |
107 | }; | |
108 | ||
109 | check( $tmpl, \%hash ) or return; | |
110 | ||
111 | ### parse the uri to usable parts ### | |
112 | my $href = __PACKAGE__->_parse_uri( $uri ) or return; | |
113 | ||
114 | ### make it into a FFI object ### | |
115 | my $ffi = File::Fetch::Item->new( %$href ) or return; | |
116 | ||
117 | ||
118 | ### return the object ### | |
119 | return $ffi; | |
120 | } | |
121 | ||
122 | ### parses an uri to a hash structure: | |
123 | ### | |
124 | ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) | |
125 | ### | |
126 | ### becomes: | |
127 | ### | |
128 | ### $href = { | |
129 | ### scheme => 'ftp', | |
130 | ### host => 'ftp.cpan.org', | |
131 | ### path => '/pub/mirror', | |
132 | ### file => 'index.html' | |
133 | ### }; | |
134 | ### | |
135 | sub _parse_uri { | |
136 | my $self = shift; | |
137 | my $uri = shift or return; | |
138 | ||
139 | my $href = { uri => $uri }; | |
140 | ||
141 | ### find the scheme ### | |
142 | $uri =~ s|^(\w+)://||; | |
143 | $href->{scheme} = $1; | |
144 | ||
145 | ### file:// paths have no host ### | |
146 | if( $href->{scheme} eq 'file' ) { | |
147 | $href->{path} = $uri; | |
148 | $href->{host} = ''; | |
149 | ||
150 | } else { | |
151 | @{$href}{qw|host path|} = $uri =~ m|([^/]*)(/.*)$|s; | |
152 | } | |
153 | ||
154 | ### split the path into file + dir ### | |
155 | { my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); | |
156 | $href->{path} = $parts[1]; | |
157 | $href->{file} = $parts[2]; | |
158 | } | |
159 | ||
160 | ||
161 | return $href; | |
162 | } | |
163 | ||
164 | =head2 $ff->fetch( [to => /my/output/dir/] ) | |
165 | ||
166 | Fetches the file you requested. By default it writes to C<cwd()>, | |
167 | but you can override that by specifying the C<to> argument. | |
168 | ||
169 | Returns the full path to the downloaded file on success, and false | |
170 | on failure. | |
171 | ||
172 | =cut | |
173 | ||
174 | sub fetch { | |
175 | my $self = shift or return; | |
176 | my %hash = @_; | |
177 | ||
178 | my $to; | |
179 | my $tmpl = { | |
180 | to => { default => cwd(), store => \$to }, | |
181 | }; | |
182 | ||
183 | check( $tmpl, \%hash ) or return; | |
184 | ||
185 | ### create the path if it doesn't exist yet ### | |
186 | unless( -d $to ) { | |
187 | eval { mkpath( $to ) }; | |
188 | ||
189 | return $self->_error(loc("Could not create path '%1'",$to)) if $@; | |
190 | } | |
191 | ||
192 | ### set passive ftp if required ### | |
193 | local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; | |
194 | ||
195 | ### | |
196 | for my $method ( @{ $METHODS->{$self->scheme} } ) { | |
197 | my $sub = '_'.$method.'_fetch'; | |
198 | ||
199 | unless( __PACKAGE__->can($sub) ) { | |
200 | $self->_error(loc("Cannot call method for '%1' -- WEIRD!", | |
201 | $method)); | |
202 | next; | |
203 | } | |
204 | ||
205 | ### method is blacklisted ### | |
206 | next if grep { lc $_ eq $method } @$BLACKLIST; | |
207 | ||
208 | ### method is known to fail ### | |
209 | next if $METHOD_FAIL->{$method}; | |
210 | ||
211 | if(my $file = $self->$sub(to=>File::Spec->catfile($to,$self->file))){ | |
212 | ||
213 | unless( -e $file && -s _ ) { | |
214 | $self->_error(loc("'%1' said it fetched '%2', ". | |
215 | "but it was not created",$method,$file)); | |
216 | ||
217 | ### mark the failure ### | |
218 | $METHOD_FAIL->{$method} = 1; | |
219 | ||
220 | next; | |
221 | ||
222 | } else { | |
223 | ||
224 | my $abs = File::Spec->rel2abs( $file ); | |
225 | return $abs; | |
226 | } | |
227 | } | |
228 | } | |
229 | ||
230 | ||
231 | ### if we got here, we looped over all methods, but we weren't able | |
232 | ### to fetch it. | |
233 | return; | |
234 | } | |
235 | ||
236 | =head1 ACCESSORS | |
237 | ||
238 | A C<File::Fetch> object has the following accessors | |
239 | ||
240 | =over 4 | |
241 | ||
242 | =item $ff->uri | |
243 | ||
244 | The uri you passed to the constructor | |
245 | ||
246 | =item $ff->scheme | |
247 | ||
248 | The scheme from the uri (like 'file', 'http', etc) | |
249 | ||
250 | =item $ff->host | |
251 | ||
252 | The hostname in the uri, will be empty for a 'file' scheme. | |
253 | ||
254 | =item $ff->path | |
255 | ||
256 | The path from the uri, will be at least a single '/'. | |
257 | ||
258 | =item $ff->file | |
259 | ||
260 | The name of the remote file. Will be used as the name for the local | |
261 | file as well. | |
262 | ||
263 | =back | |
264 | ||
265 | =cut | |
266 | ||
267 | ######################## | |
268 | ### _*_fetch methods ### | |
269 | ######################## | |
270 | ||
271 | ### LWP fetching ### | |
272 | sub _lwp_fetch { | |
273 | my $self = shift; | |
274 | my %hash = @_; | |
275 | ||
276 | my ($to); | |
277 | my $tmpl = { | |
278 | to => { required => 1, store => \$to } | |
279 | }; | |
280 | check( $tmpl, \%hash ) or return; | |
281 | ||
282 | ### modules required to download with lwp ### | |
283 | my $use_list = { | |
284 | LWP => '0.0', | |
285 | 'LWP::UserAgent' => '0.0', | |
286 | 'HTTP::Request' => '0.0', | |
287 | 'HTTP::Status' => '0.0', | |
288 | URI => '0.0', | |
289 | ||
290 | }; | |
291 | ||
292 | if( can_load(modules => $use_list) ) { | |
293 | ||
294 | ### setup the uri object | |
295 | my $uri = URI->new( File::Spec::Unix->catfile( | |
296 | $self->path, $self->file | |
297 | ) ); | |
298 | ||
299 | ### special rules apply for file:// uris ### | |
300 | $uri->scheme( $self->scheme ); | |
301 | $uri->host( $self->scheme eq 'file' ? '' : $self->host ); | |
302 | $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file'; | |
303 | ||
304 | ### set up the useragent object | |
305 | my $ua = LWP::UserAgent->new(); | |
306 | $ua->timeout( $TIMEOUT ) if $TIMEOUT; | |
307 | $ua->agent( $USER_AGENT ); | |
308 | $ua->from( $FROM_EMAIL ); | |
309 | $ua->env_proxy; | |
310 | ||
311 | my $res = $ua->mirror($uri, $to) or return; | |
312 | ||
313 | ### uptodate or fetched ok ### | |
314 | if ( $res->code == 304 or $res->code == 200 ) { | |
315 | return $to; | |
316 | ||
317 | } else { | |
318 | return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", | |
319 | $res->code, HTTP::Status::status_message($res->code), | |
320 | $res->status_line)); | |
321 | } | |
322 | ||
323 | } else { | |
324 | $METHOD_FAIL->{'lwp'} = 1; | |
325 | return; | |
326 | } | |
327 | } | |
328 | ||
329 | ### Net::FTP fetching | |
330 | sub _netftp_fetch { | |
331 | my $self = shift; | |
332 | my %hash = @_; | |
333 | ||
334 | my ($to); | |
335 | my $tmpl = { | |
336 | to => { required => 1, store => \$to } | |
337 | }; | |
338 | check( $tmpl, \%hash ) or return; | |
339 | ||
340 | ### required modules ### | |
341 | my $use_list = { 'Net::FTP' => 0 }; | |
342 | ||
343 | if( can_load( modules => $use_list ) ) { | |
344 | ||
345 | ### make connection ### | |
346 | my $ftp; | |
347 | my @options = ($self->host); | |
348 | push(@options, Timeout => $TIMEOUT) if $TIMEOUT; | |
349 | unless( $ftp = Net::FTP->new( @options ) ) { | |
350 | return $self->_error(loc("Ftp creation failed: %1",$@)); | |
351 | } | |
352 | ||
353 | ### login ### | |
354 | unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { | |
355 | return $self->_error(loc("Could not login to '%1'",$self->host)); | |
356 | } | |
357 | ||
358 | ### set binary mode, just in case ### | |
359 | $ftp->binary; | |
360 | ||
361 | ### create the remote path | |
362 | ### remember remote paths are unix paths! [#11483] | |
363 | my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); | |
364 | ||
365 | ### fetch the file ### | |
366 | my $target; | |
367 | unless( $target = $ftp->get( $remote, $to ) ) { | |
368 | return $self->_error(loc("Could not fetch '%1' from '%2'", | |
369 | $remote, $self->host)); | |
370 | } | |
371 | ||
372 | ### log out ### | |
373 | $ftp->quit; | |
374 | ||
375 | return $target; | |
376 | ||
377 | } else { | |
378 | $METHOD_FAIL->{'netftp'} = 1; | |
379 | return; | |
380 | } | |
381 | } | |
382 | ||
383 | ### /bin/wget fetch ### | |
384 | sub _wget_fetch { | |
385 | my $self = shift; | |
386 | my %hash = @_; | |
387 | ||
388 | my ($to); | |
389 | my $tmpl = { | |
390 | to => { required => 1, store => \$to } | |
391 | }; | |
392 | check( $tmpl, \%hash ) or return; | |
393 | ||
394 | ### see if we have a wget binary ### | |
395 | if( my $wget = can_run('wget') ) { | |
396 | ||
397 | ### no verboseness, thanks ### | |
398 | my $cmd = [ $wget, '--quiet' ]; | |
399 | ||
400 | ### if a timeout is set, add it ### | |
401 | push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; | |
402 | ||
403 | ### run passive if specified ### | |
404 | push @$cmd, '--passive-ftp' if $FTP_PASSIVE; | |
405 | ||
406 | ### set the output document, add the uri ### | |
407 | push @$cmd, '--output-document', $to, $self->uri; | |
408 | ||
409 | ### shell out ### | |
410 | my $captured; | |
411 | unless( run( command => $cmd, buffer => \$captured, verbose => 0 ) ) { | |
412 | ### wget creates the output document always, even if the fetch | |
413 | ### fails.. so unlink it in that case | |
414 | 1 while unlink $to; | |
415 | ||
416 | return $self->_error(loc( "Command failed: %1", $captured || '' )); | |
417 | } | |
418 | ||
419 | return $to; | |
420 | ||
421 | } else { | |
422 | $METHOD_FAIL->{'wget'} = 1; | |
423 | return; | |
424 | } | |
425 | } | |
426 | ||
427 | ||
428 | ### /bin/ftp fetch ### | |
429 | sub _ftp_fetch { | |
430 | my $self = shift; | |
431 | my %hash = @_; | |
432 | ||
433 | my ($to); | |
434 | my $tmpl = { | |
435 | to => { required => 1, store => \$to } | |
436 | }; | |
437 | check( $tmpl, \%hash ) or return; | |
438 | ||
439 | ### see if we have a wget binary ### | |
440 | if( my $ftp = can_run('ftp') ) { | |
441 | ||
442 | my $fh = FileHandle->new; | |
443 | ||
444 | local $SIG{CHLD} = 'IGNORE'; | |
445 | ||
446 | unless ($fh->open("|$ftp -n")) { | |
447 | return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); | |
448 | } | |
449 | ||
450 | my @dialog = ( | |
451 | "lcd " . dirname($to), | |
452 | "open " . $self->host, | |
453 | "user anonymous $FROM_EMAIL", | |
454 | "cd /", | |
455 | "cd " . $self->path, | |
456 | "binary", | |
457 | "get " . $self->file . " " . $self->file, | |
458 | "quit", | |
459 | ); | |
460 | ||
461 | foreach (@dialog) { $fh->print($_, "\n") } | |
462 | $fh->close or return; | |
463 | ||
464 | return $to; | |
465 | } | |
466 | } | |
467 | ||
468 | ### lynx is stupid - it decompresses any .gz file it finds to be text | |
469 | ### use /bin/lynx to fetch files | |
470 | sub _lynx_fetch { | |
471 | my $self = shift; | |
472 | my %hash = @_; | |
473 | ||
474 | my ($to); | |
475 | my $tmpl = { | |
476 | to => { required => 1, store => \$to } | |
477 | }; | |
478 | check( $tmpl, \%hash ) or return; | |
479 | ||
480 | ### see if we have a wget binary ### | |
481 | if( my $lynx = can_run('lynx') ) { | |
482 | ||
483 | ||
484 | ### write to the output file ourselves, since lynx ass_u_mes to much | |
485 | my $local = FileHandle->new(">$to") | |
486 | or return $self->_error(loc( | |
487 | "Could not open '%1' for writing: %2",$to,$!)); | |
488 | ||
489 | ### dump to stdout ### | |
490 | my $cmd = [ | |
491 | $lynx, | |
492 | '-source', | |
493 | "-auth=anonymous:$FROM_EMAIL", | |
494 | ]; | |
495 | ||
496 | push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; | |
497 | ||
498 | push @$cmd, $self->uri; | |
499 | ||
500 | ### shell out ### | |
501 | my $captured; | |
502 | unless(run( command => $cmd, | |
503 | buffer => \$captured, | |
504 | verbose => $DEBUG ) | |
505 | ) { | |
506 | return $self->_error(loc("Command failed: %1", $captured || '')); | |
507 | } | |
508 | ||
509 | ### print to local file ### | |
510 | ### XXX on a 404 with a special error page, $captured will actually | |
511 | ### hold the contents of that page, and make it *appear* like the | |
512 | ### request was a success, when really it wasn't :( | |
513 | ### there doesn't seem to be an option for lynx to change the exit | |
514 | ### code based on a 4XX status or so. | |
515 | ### the closest we can come is using --error_file and parsing that, | |
516 | ### which is very unreliable ;( | |
517 | $local->print( $captured ); | |
518 | $local->close or return; | |
519 | ||
520 | return $to; | |
521 | ||
522 | } else { | |
523 | $METHOD_FAIL->{'lynx'} = 1; | |
524 | return; | |
525 | } | |
526 | } | |
527 | ||
528 | ### use /bin/ncftp to fetch files | |
529 | sub _ncftp_fetch { | |
530 | my $self = shift; | |
531 | my %hash = @_; | |
532 | ||
533 | my ($to); | |
534 | my $tmpl = { | |
535 | to => { required => 1, store => \$to } | |
536 | }; | |
537 | check( $tmpl, \%hash ) or return; | |
538 | ||
539 | ### we can only set passive mode in interactive sesssions, so bail out | |
540 | ### if $FTP_PASSIVE is set | |
541 | return if $FTP_PASSIVE; | |
542 | ||
543 | ### see if we have a wget binary ### | |
544 | if( my $ncftp = can_run('ncftp') ) { | |
545 | ||
546 | my $cmd = [ | |
547 | $ncftp, | |
548 | '-V', # do not be verbose | |
549 | '-p', $FROM_EMAIL, # email as password | |
550 | $self->host, # hostname | |
551 | dirname($to), # local dir for the file | |
552 | # remote path to the file | |
553 | File::Spec::Unix->catdir( $self->path, $self->file ), | |
554 | ]; | |
555 | ||
556 | ### shell out ### | |
557 | my $captured; | |
558 | unless(run( command => $cmd, | |
559 | buffer => \$captured, | |
560 | verbose => $DEBUG ) | |
561 | ) { | |
562 | return $self->_error(loc("Command failed: %1", $captured || '')); | |
563 | } | |
564 | ||
565 | return $to; | |
566 | ||
567 | } else { | |
568 | $METHOD_FAIL->{'ncftp'} = 1; | |
569 | return; | |
570 | } | |
571 | } | |
572 | ||
573 | ### use /bin/curl to fetch files | |
574 | sub _curl_fetch { | |
575 | my $self = shift; | |
576 | my %hash = @_; | |
577 | ||
578 | my ($to); | |
579 | my $tmpl = { | |
580 | to => { required => 1, store => \$to } | |
581 | }; | |
582 | check( $tmpl, \%hash ) or return; | |
583 | ||
584 | if (my $curl = can_run('curl')) { | |
585 | ||
586 | ### these long opts are self explanatory - I like that -jmb | |
587 | my $cmd = [ $curl ]; | |
588 | ||
589 | push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; | |
590 | ||
591 | push(@$cmd, '--silent') unless $DEBUG; | |
592 | ||
593 | ### curl does the right thing with passive, regardless ### | |
594 | if ($self->scheme eq 'ftp') { | |
595 | push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); | |
596 | } | |
597 | ||
598 | ### curl doesn't follow 302 (temporarily moved) etc automatically | |
599 | ### so we add --location to enable that. | |
600 | push @$cmd, '--fail', '--location', '--output', $to, $self->uri; | |
601 | ||
602 | my $captured; | |
603 | unless(run( command => $cmd, | |
604 | buffer => \$captured, | |
605 | verbose => $DEBUG ) | |
606 | ) { | |
607 | ||
608 | return $self->_error(loc("Command failed: %1", $captured || '')); | |
609 | } | |
610 | ||
611 | return $to; | |
612 | ||
613 | } else { | |
614 | $METHOD_FAIL->{'curl'} = 1; | |
615 | return; | |
616 | } | |
617 | } | |
618 | ||
619 | ||
620 | ### use File::Copy for fetching file:// urls ### | |
621 | ### XXX file:// uri to local path conversion is just too weird... | |
622 | ### depend on LWP to do it for us | |
623 | sub _file_fetch { | |
624 | my $self = shift; | |
625 | my %hash = @_; | |
626 | ||
627 | my ($to); | |
628 | my $tmpl = { | |
629 | to => { required => 1, store => \$to } | |
630 | }; | |
631 | check( $tmpl, \%hash ) or return; | |
632 | ||
633 | ### prefix a / on unix systems with a file uri, since it would | |
634 | ### look somewhat like this: | |
635 | ### file://home/kane/file | |
636 | ### wheras windows file uris might look like: | |
637 | ### file://C:/home/kane/file | |
638 | my $path = ON_UNIX ? '/'. $self->path : $self->path; | |
639 | ||
640 | my $remote = File::Spec->catfile( $path, $self->file ); | |
641 | ||
642 | ### File::Copy is littered with 'die' statements :( ### | |
643 | my $rv = eval { File::Copy::copy( $remote, $to ) }; | |
644 | ||
645 | ### something went wrong ### | |
646 | if( !$rv or $@ ) { | |
647 | return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", | |
648 | $remote, $to, $!, $@)); | |
649 | } | |
650 | ||
651 | return $to; | |
652 | } | |
653 | ||
654 | ### use /usr/bin/rsync to fetch files | |
655 | sub _rsync_fetch { | |
656 | my $self = shift; | |
657 | my %hash = @_; | |
658 | ||
659 | my ($to); | |
660 | my $tmpl = { | |
661 | to => { required => 1, store => \$to } | |
662 | }; | |
663 | check( $tmpl, \%hash ) or return; | |
664 | ||
665 | if (my $rsync = can_run('rsync')) { | |
666 | ||
667 | my $cmd = [ $rsync ]; | |
668 | ||
669 | ### XXX: rsync has no I/O timeouts at all, by default | |
670 | push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; | |
671 | ||
672 | push(@$cmd, '--quiet') unless $DEBUG; | |
673 | ||
674 | push @$cmd, $self->uri, $to; | |
675 | ||
676 | my $captured; | |
677 | unless(run( command => $cmd, | |
678 | buffer => \$captured, | |
679 | verbose => $DEBUG ) | |
680 | ) { | |
681 | ||
682 | return $self->_error(loc("Command failed: %1", $captured || '')); | |
683 | } | |
684 | ||
685 | return $to; | |
686 | ||
687 | } else { | |
688 | $METHOD_FAIL->{'rsync'} = 1; | |
689 | return; | |
690 | } | |
691 | } | |
692 | ||
693 | ################################# | |
694 | # | |
695 | # Error code | |
696 | # | |
697 | ################################# | |
698 | ||
699 | =pod | |
700 | ||
701 | =head2 $ff->error([BOOL]) | |
702 | ||
703 | Returns the last encountered error as string. | |
704 | Pass it a true value to get the C<Carp::longmess()> output instead. | |
705 | ||
706 | =cut | |
707 | ||
708 | ### Error handling, the way Archive::Tar does it ### | |
709 | { | |
710 | my $error = ''; | |
711 | my $longmess = ''; | |
712 | ||
713 | sub _error { | |
714 | my $self = shift; | |
715 | $error = shift; | |
716 | $longmess = Carp::longmess($error); | |
717 | ||
718 | ### set Archive::Tar::WARN to 0 to disable printing | |
719 | ### of errors | |
720 | if( $WARN ) { | |
721 | carp $DEBUG ? $longmess : $error; | |
722 | } | |
723 | ||
724 | return; | |
725 | } | |
726 | ||
727 | sub error { | |
728 | my $self = shift; | |
729 | return shift() ? $longmess : $error; | |
730 | } | |
731 | } | |
732 | ||
733 | ||
734 | ||
735 | 1; | |
736 | ||
737 | =pod | |
738 | ||
739 | =head1 HOW IT WORKS | |
740 | ||
741 | File::Fetch is able to fetch a variety of uris, by using several | |
742 | external programs and modules. | |
743 | ||
744 | Below is a mapping of what utilities will be used in what order | |
745 | for what schemes, if available: | |
746 | ||
747 | file => LWP, file | |
748 | http => LWP, wget, curl, lynx | |
749 | ftp => LWP, Net::FTP, wget, curl, ncftp, ftp | |
750 | rsync => rsync | |
751 | ||
752 | If you'd like to disable the use of one or more of these utilities | |
753 | and/or modules, see the C<$BLACKLIST> variable further down. | |
754 | ||
755 | If a utility or module isn't available, it will be marked in a cache | |
756 | (see the C<$METHOD_FAIL> variable further down), so it will not be | |
757 | tried again. The C<fetch> method will only fail when all options are | |
758 | exhausted, and it was not able to retrieve the file. | |
759 | ||
760 | A special note about fetching files from an ftp uri: | |
761 | ||
762 | By default, all ftp connections are done in passive mode. To change | |
763 | that, see the C<$FTP_PASSIVE> variable further down. | |
764 | ||
765 | Furthermore, ftp uris only support anonymous connections, so no | |
766 | named user/password pair can be passed along. | |
767 | ||
768 | C</bin/ftp> is blacklisted by default; see the C<$BLACKLIST> variable | |
769 | further down. | |
770 | ||
771 | =head1 GLOBAL VARIABLES | |
772 | ||
773 | The behaviour of File::Fetch can be altered by changing the following | |
774 | global variables: | |
775 | ||
776 | =head2 $File::Fetch::FROM_EMAIL | |
777 | ||
778 | This is the email address that will be sent as your anonymous ftp | |
779 | password. | |
780 | ||
781 | Default is C<File-Fetch@example.com>. | |
782 | ||
783 | =head2 $File::Fetch::USER_AGENT | |
784 | ||
785 | This is the useragent as C<LWP> will report it. | |
786 | ||
787 | Default is C<File::Fetch/$VERSION>. | |
788 | ||
789 | =head2 $File::Fetch::FTP_PASSIVE | |
790 | ||
791 | This variable controls whether the environment variable C<FTP_PASSIVE> | |
792 | and any passive switches to commandline tools will be set to true. | |
793 | ||
794 | Default value is 1. | |
795 | ||
796 | Note: When $FTP_PASSIVE is true, C<ncftp> will not be used to fetch | |
797 | files, since passive mode can only be set interactively for this binary | |
798 | ||
799 | =head2 $File::Fetch::TIMEOUT | |
800 | ||
801 | When set, controls the network timeout (counted in seconds). | |
802 | ||
803 | Default value is 0. | |
804 | ||
805 | =head2 $File::Fetch::WARN | |
806 | ||
807 | This variable controls whether errors encountered internally by | |
808 | C<File::Fetch> should be C<carp>'d or not. | |
809 | ||
810 | Set to false to silence warnings. Inspect the output of the C<error()> | |
811 | method manually to see what went wrong. | |
812 | ||
813 | Defaults to C<true>. | |
814 | ||
815 | =head2 $File::Fetch::DEBUG | |
816 | ||
817 | This enables debugging output when calling commandline utilities to | |
818 | fetch files. | |
819 | This also enables C<Carp::longmess> errors, instead of the regular | |
820 | C<carp> errors. | |
821 | ||
822 | Good for tracking down why things don't work with your particular | |
823 | setup. | |
824 | ||
825 | Default is 0. | |
826 | ||
827 | =head2 $File::Fetch::BLACKLIST | |
828 | ||
829 | This is an array ref holding blacklisted modules/utilities for fetching | |
830 | files with. | |
831 | ||
832 | To disallow the use of, for example, C<LWP> and C<Net::FTP>, you could | |
833 | set $File::Fetch::BLACKLIST to: | |
834 | ||
835 | $File::Fetch::BLACKLIST = [qw|lwp netftp|] | |
836 | ||
837 | The default blacklist is [qw|ftp|], as C</bin/ftp> is rather unreliable. | |
838 | ||
839 | See the note on C<MAPPING> below. | |
840 | ||
841 | =head2 $File::Fetch::METHOD_FAIL | |
842 | ||
843 | This is a hashref registering what modules/utilities were known to fail | |
844 | for fetching files (mostly because they weren't installed). | |
845 | ||
846 | You can reset this cache by assigning an empty hashref to it, or | |
847 | individually remove keys. | |
848 | ||
849 | See the note on C<MAPPING> below. | |
850 | ||
851 | =head1 MAPPING | |
852 | ||
853 | ||
854 | Here's a quick mapping for the utilities/modules, and their names for | |
855 | the $BLACKLIST, $METHOD_FAIL and other internal functions. | |
856 | ||
857 | LWP => lwp | |
858 | Net::FTP => netftp | |
859 | wget => wget | |
860 | lynx => lynx | |
861 | ncftp => ncftp | |
862 | ftp => ftp | |
863 | curl => curl | |
864 | rsync => rsync | |
865 | ||
866 | =head1 FREQUENTLY ASKED QUESTIONS | |
867 | ||
868 | =head2 So how do I use a proxy with File::Fetch? | |
869 | ||
870 | C<File::Fetch> currently only supports proxies with LWP::UserAgent. | |
871 | You will need to set your environment variables accordingly. For | |
872 | example, to use an ftp proxy: | |
873 | ||
874 | $ENV{ftp_proxy} = 'foo.com'; | |
875 | ||
876 | Refer to the LWP::UserAgent manpage for more details. | |
877 | ||
878 | =head2 I used 'lynx' to fetch a file, but its contents is all wrong! | |
879 | ||
880 | C<lynx> can only fetch remote files by dumping its contents to C<STDOUT>, | |
881 | which we in turn capture. If that content is a 'custom' error file | |
882 | (like, say, a C<404 handler>), you will get that contents instead. | |
883 | ||
884 | Sadly, C<lynx> doesn't support any options to return a different exit | |
885 | code on non-C<200 OK> status, giving us no way to tell the difference | |
886 | between a 'successfull' fetch and a custom error page. | |
887 | ||
888 | Therefor, we recommend to only use C<lynx> as a last resort. This is | |
889 | why it is at the back of our list of methods to try as well. | |
890 | ||
891 | =head1 TODO | |
892 | ||
893 | =over 4 | |
894 | ||
895 | =item Implement $PREFER_BIN | |
896 | ||
897 | To indicate to rather use commandline tools than modules | |
898 | ||
899 | =head1 AUTHORS | |
900 | ||
901 | This module by | |
902 | Jos Boumans E<lt>kane@cpan.orgE<gt>. | |
903 | ||
904 | =head1 COPYRIGHT | |
905 | ||
906 | This module is | |
907 | copyright (c) 2003 Jos Boumans E<lt>kane@cpan.orgE<gt>. | |
908 | All rights reserved. | |
909 | ||
910 | This library is free software; | |
911 | you may redistribute and/or modify it under the same | |
912 | terms as Perl itself. | |
913 | ||
914 | =cut | |
915 | ||
916 | # Local variables: | |
917 | # c-indentation-style: bsd | |
918 | # c-basic-offset: 4 | |
919 | # indent-tabs-mode: nil | |
920 | # End: | |
921 | # vim: expandtab shiftwidth=4: | |
922 | ||
923 | ||
924 | ||
925 |