This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Silence ill-behaved or failing Module::Build tests on VMS.
[perl5.git] / lib / Archive / Extract.pm
CommitLineData
520c99e2
JB
1package Archive::Extract;
2
3use strict;
4
5use Cwd qw[cwd];
6use Carp qw[carp];
7use IPC::Cmd qw[run can_run];
8use FileHandle;
9use File::Path qw[mkpath];
10use File::Spec;
11use File::Basename qw[dirname basename];
12use Params::Check qw[check];
13use Module::Load::Conditional qw[can_load check_install];
14use Locale::Maketext::Simple Style => 'gettext';
15
16### solaris has silly /bin/tar output ###
17use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
18use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
19
4f3b9739
JM
20### VMS may require quoting upper case command options
21use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
22
520c99e2
JB
23### If these are changed, update @TYPES and the new() POD
24use constant TGZ => 'tgz';
25use constant TAR => 'tar';
26use constant GZ => 'gz';
27use constant ZIP => 'zip';
28use constant BZ2 => 'bz2';
29use constant TBZ => 'tbz';
1dae2fb5 30use constant Z => 'Z';
520c99e2
JB
31
32use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
33
9e5a0ef9 34$VERSION = '0.24';
520c99e2
JB
35$PREFER_BIN = 0;
36$WARN = 1;
37$DEBUG = 0;
1dae2fb5 38my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z ); # same as all constants
520c99e2
JB
39
40local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
41
42=pod
43
44=head1 NAME
45
46Archive::Extract - A generic archive extracting mechanism
47
48=head1 SYNOPSIS
49
50 use Archive::Extract;
51
52 ### build an Archive::Extract object ###
53 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
54
55 ### extract to cwd() ###
56 my $ok = $ae->extract;
57
58 ### extract to /tmp ###
59 my $ok = $ae->extract( to => '/tmp' );
60
61 ### what if something went wrong?
62 my $ok = $ae->extract or die $ae->error;
63
64 ### files from the archive ###
65 my $files = $ae->files;
66
67 ### dir that was extracted to ###
68 my $outdir = $ae->extract_path;
69
70
71 ### quick check methods ###
72 $ae->is_tar # is it a .tar file?
73 $ae->is_tgz # is it a .tar.gz or .tgz file?
74 $ae->is_gz; # is it a .gz file?
75 $ae->is_zip; # is it a .zip file?
76 $ae->is_bz2; # is it a .bz2 file?
77 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
78
79 ### absolute path to the archive you provided ###
80 $ae->archive;
81
82 ### commandline tools, if found ###
83 $ae->bin_tar # path to /bin/tar, if found
84 $ae->bin_gzip # path to /bin/gzip, if found
85 $ae->bin_unzip # path to /bin/unzip, if found
86 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
87
88=head1 DESCRIPTION
89
90Archive::Extract is a generic archive extraction mechanism.
91
92It allows you to extract any archive file of the type .tar, .tar.gz,
1dae2fb5
RGS
93.gz, .Z, tar.bz2, .tbz, .bz2 or .zip without having to worry how it
94does so, or use different interfaces for each type by using either
95perl modules, or commandline tools on your system.
520c99e2
JB
96
97See the C<HOW IT WORKS> section further down for details.
98
99=cut
100
101
102### see what /bin/programs are available ###
103$PROGRAMS = {};
1dae2fb5 104for my $pgm (qw[tar unzip gzip bunzip2 uncompress]) {
520c99e2
JB
105 $PROGRAMS->{$pgm} = can_run($pgm);
106}
107
108### mapping from types to extractor methods ###
109my $Mapping = {
110 is_tgz => '_untar',
111 is_tar => '_untar',
112 is_gz => '_gunzip',
113 is_zip => '_unzip',
114 is_tbz => '_untar',
115 is_bz2 => '_bunzip2',
1dae2fb5 116 is_Z => '_uncompress',
520c99e2
JB
117};
118
119{
120 my $tmpl = {
121 archive => { required => 1, allow => FILE_EXISTS },
122 type => { default => '', allow => [ @Types ] },
123 };
124
125 ### build accesssors ###
126 for my $method( keys %$tmpl,
127 qw[_extractor _gunzip_to files extract_path],
128 qw[_error_msg _error_msg_long]
129 ) {
130 no strict 'refs';
131 *$method = sub {
132 my $self = shift;
133 $self->{$method} = $_[0] if @_;
134 return $self->{$method};
135 }
136 }
137
138=head1 METHODS
139
140=head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
141
142Creates a new C<Archive::Extract> object based on the archive file you
143passed it. Automatically determines the type of archive based on the
144extension, but you can override that by explicitly providing the
145C<type> argument.
146
147Valid values for C<type> are:
148
149=over 4
150
151=item tar
152
153Standard tar files, as produced by, for example, C</bin/tar>.
154Corresponds to a C<.tar> suffix.
155
156=item tgz
157
158Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
159Corresponds to a C<.tgz> or C<.tar.gz> suffix.
160
161=item gz
162
163Gzip compressed file, as produced by, for example C</bin/gzip>.
164Corresponds to a C<.gz> suffix.
165
1dae2fb5
RGS
166=item Z
167
168Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
169Corresponds to a C<.Z> suffix.
170
520c99e2
JB
171=item zip
172
173Zip compressed file, as produced by, for example C</bin/zip>.
174Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
175
176=item bz2
177
178Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
179Corresponds to a C<.bz2> suffix.
180
181=item tbz
182
183Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
184Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
185
186=back
187
188Returns a C<Archive::Extract> object on success, or false on failure.
189
190=cut
191
192 ### constructor ###
193 sub new {
194 my $class = shift;
195 my %hash = @_;
196
197 my $parsed = check( $tmpl, \%hash ) or return;
198
199 ### make sure we have an absolute path ###
200 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
201
202 ### figure out the type, if it wasn't already specified ###
203 unless ( $parsed->{type} ) {
204 $parsed->{type} =
574b415d 205 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
520c99e2
JB
206 $ar =~ /.+?\.gz$/i ? GZ :
207 $ar =~ /.+?\.tar$/i ? TAR :
208 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
9e5a0ef9 209 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
520c99e2 210 $ar =~ /.+?\.bz2$/i ? BZ2 :
1dae2fb5 211 $ar =~ /.+?\.Z$/ ? Z :
520c99e2
JB
212 '';
213
214 }
215
216 ### don't know what type of file it is ###
217 return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
218 $parsed->{archive} )) unless $parsed->{type};
219
220 return bless $parsed, $class;
221 }
222}
223
224=head2 $ae->extract( [to => '/output/path'] )
225
226Extracts the archive represented by the C<Archive::Extract> object to
227the path of your choice as specified by the C<to> argument. Defaults to
228C<cwd()>.
229
230Since C<.gz> files never hold a directory, but only a single file; if
231the C<to> argument is an existing directory, the file is extracted
232there, with it's C<.gz> suffix stripped.
233If the C<to> argument is not an existing directory, the C<to> argument
234is understood to be a filename, if the archive type is C<gz>.
235In the case that you did not specify a C<to> argument, the output
236file will be the name of the archive file, stripped from it's C<.gz>
237suffix, in the current working directory.
238
239C<extract> will try a pure perl solution first, and then fall back to
240commandline tools if they are available. See the C<GLOBAL VARIABLES>
241section below on how to alter this behaviour.
242
243It will return true on success, and false on failure.
244
245On success, it will also set the follow attributes in the object:
246
247=over 4
248
249=item $ae->extract_path
250
251This is the directory that the files where extracted to.
252
253=item $ae->files
254
255This is an array ref with the paths of all the files in the archive,
256relative to the C<to> argument you specified.
257To get the full path to an extracted file, you would use:
258
259 File::Spec->catfile( $to, $ae->files->[0] );
260
261Note that all files from a tar archive will be in unix format, as per
262the tar specification.
263
264=back
265
266=cut
267
268sub extract {
269 my $self = shift;
270 my %hash = @_;
271
272 my $to;
273 my $tmpl = {
274 to => { default => '.', store => \$to }
275 };
276
277 check( $tmpl, \%hash ) or return;
278
279 ### so 'to' could be a file or a dir, depending on whether it's a .gz
280 ### file, or basically anything else.
281 ### so, check that, then act accordingly.
282 ### set an accessor specifically so _gunzip can know what file to extract
283 ### to.
284 my $dir;
285 { ### a foo.gz file
1dae2fb5 286 if( $self->is_gz or $self->is_bz2 or $self->is_Z) {
520c99e2 287
9e5a0ef9 288 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z)$//i;
520c99e2
JB
289
290 ### to is a dir?
291 if ( -d $to ) {
292 $dir = $to;
293 $self->_gunzip_to( basename($cp) );
294
295 ### then it's a filename
296 } else {
297 $dir = dirname($to);
298 $self->_gunzip_to( basename($to) );
299 }
300
301 ### not a foo.gz file
302 } else {
303 $dir = $to;
304 }
305 }
306
307 ### make the dir if it doesn't exist ###
308 unless( -d $dir ) {
309 eval { mkpath( $dir ) };
310
311 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
312 if $@;
313 }
314
315 ### get the current dir, to restore later ###
316 my $cwd = cwd();
317
318 my $ok = 1;
319 EXTRACT: {
320
321 ### chdir to the target dir ###
322 unless( chdir $dir ) {
323 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
324 $ok = 0; last EXTRACT;
325 }
326
327 ### set files to an empty array ref, so there's always an array
328 ### ref IN the accessor, to avoid errors like:
329 ### Can't use an undefined value as an ARRAY reference at
330 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
331 $self->files( [] );
332
333 ### find what extractor method to use ###
334 while( my($type,$method) = each %$Mapping ) {
335
336 ### call the corresponding method if the type is OK ###
337 if( $self->$type) {
338 $ok = $self->$method();
339 }
340 }
341
342 ### warn something went wrong if we didn't get an OK ###
343 $self->_error(loc("Extract failed, no extractor found"))
344 unless $ok;
345
346 }
347
348 ### and chdir back ###
349 unless( chdir $cwd ) {
350 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
351 $cwd, $!));
352 }
353
354 return $ok;
355}
356
357=pod
358
359=head1 ACCESSORS
360
361=head2 $ae->error([BOOL])
362
363Returns the last encountered error as string.
364Pass it a true value to get the C<Carp::longmess()> output instead.
365
366=head2 $ae->extract_path
367
368This is the directory the archive got extracted to.
369See C<extract()> for details.
370
371=head2 $ae->files
372
373This is an array ref holding all the paths from the archive.
374See C<extract()> for details.
375
376=head2 $ae->archive
377
378This is the full path to the archive file represented by this
379C<Archive::Extract> object.
380
381=head2 $ae->type
382
383This is the type of archive represented by this C<Archive::Extract>
384object. See accessors below for an easier way to use this.
385See the C<new()> method for details.
386
387=head2 $ae->types
388
389Returns a list of all known C<types> for C<Archive::Extract>'s
390C<new> method.
391
392=cut
393
394sub types { return @Types }
395
396=head2 $ae->is_tgz
397
398Returns true if the file is of type C<.tar.gz>.
399See the C<new()> method for details.
400
401=head2 $ae->is_tar
402
403Returns true if the file is of type C<.tar>.
404See the C<new()> method for details.
405
406=head2 $ae->is_gz
407
408Returns true if the file is of type C<.gz>.
409See the C<new()> method for details.
410
1dae2fb5
RGS
411=head2 $ae->is_Z
412
413Returns true if the file is of type C<.Z>.
414See the C<new()> method for details.
415
520c99e2
JB
416=head2 $ae->is_zip
417
418Returns true if the file is of type C<.zip>.
419See the C<new()> method for details.
420
421=cut
422
423### quick check methods ###
424sub is_tgz { return $_[0]->type eq TGZ }
425sub is_tar { return $_[0]->type eq TAR }
426sub is_gz { return $_[0]->type eq GZ }
427sub is_zip { return $_[0]->type eq ZIP }
428sub is_tbz { return $_[0]->type eq TBZ }
429sub is_bz2 { return $_[0]->type eq BZ2 }
1dae2fb5 430sub is_Z { return $_[0]->type eq Z }
520c99e2
JB
431
432=pod
433
434=head2 $ae->bin_tar
435
436Returns the full path to your tar binary, if found.
437
438=head2 $ae->bin_gzip
439
440Returns the full path to your gzip binary, if found
441
442=head2 $ae->bin_unzip
443
444Returns the full path to your unzip binary, if found
445
446=cut
447
448### paths to commandline tools ###
1dae2fb5
RGS
449sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
450sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
451sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
452sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
453sub bin_uncompress { return $PROGRAMS->{'uncompress'}
454 if $PROGRAMS->{'uncompress'} }
9e5a0ef9
JB
455=head2 $bool = $ae->have_old_bunzip2
456
457Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
458require all archive names to end in C<.bz2> or it will not extract
459them. This method checks if you have a recent version of C<bunzip2>
460that allows any extension, or an older one that doesn't.
461
462=cut
463
464sub have_old_bunzip2 {
465 my $self = shift;
466
467 ### no bunzip2? no old bunzip2 either :)
468 return unless $self->bin_bunzip2;
469
470 ### if we can't run this, we can't be sure if it's too old or not
471 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
472 ### is not a request to extract data:
473 ### $ bunzip2 --version
474 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
475 ### [...]
476 ### bunzip2: I won't read compressed data from a terminal.
477 ### bunzip2: For help, type: `bunzip2 --help'.
478 ### $ echo $?
479 ### 1
480 ### HATEFUL!
481 my $buffer;
482 scalar run( command => [$self->bin_bunzip2, '--version'],
483 verbose => 0,
484 buffer => \$buffer
485 );
486
487 ### no output
488 return unless $buffer;
489
490 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
491
492 return 1 if $version < 1;
493 return;
494}
520c99e2
JB
495
496#################################
497#
498# Untar code
499#
500#################################
501
502
503### untar wrapper... goes to either Archive::Tar or /bin/tar
504### depending on $PREFER_BIN
505sub _untar {
506 my $self = shift;
507
508 ### bzip2 support in A::T via IO::Uncompress::Bzip2
509 my @methods = qw[_untar_at _untar_bin];
574b415d 510 @methods = reverse @methods if $PREFER_BIN;
520c99e2
JB
511
512 for my $method (@methods) {
513 $self->_extractor($method) && return 1 if $self->$method();
514 }
515
516 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
517}
518
519### use /bin/tar to extract ###
520sub _untar_bin {
521 my $self = shift;
522
523 ### check for /bin/tar ###
524 return $self->_error(loc("No '%1' program found", '/bin/tar'))
525 unless $self->bin_tar;
526
527 ### check for /bin/gzip if we need it ###
528 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
529 if $self->is_tgz && !$self->bin_gzip;
530
531 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
532 if $self->is_tbz && !$self->bin_bunzip2;
533
534 ### XXX figure out how to make IPC::Run do this in one call --
535 ### currently i don't know how to get output of a command after a pipe
536 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
537
538
539
540 ### see what command we should run, based on whether
541 ### it's a .tgz or .tar
542
543 ### XXX solaris tar and bsdtar are having different outputs
544 ### depending whether you run with -x or -t
545 ### compensate for this insanity by running -t first, then -x
546 { my $cmd =
547 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
548 $self->bin_tar, '-tf', '-'] :
9e5a0ef9 549 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
520c99e2
JB
550 $self->bin_tar, '-tf', '-'] :
551 [$self->bin_tar, '-tf', $self->archive];
552
553 ### run the command ###
554 my $buffer = '';
555 unless( scalar run( command => $cmd,
556 buffer => \$buffer,
557 verbose => $DEBUG )
558 ) {
559 return $self->_error(loc(
560 "Error listing contents of archive '%1': %2",
561 $self->archive, $buffer ));
562 }
563
564 ### no buffers available?
565 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
566 $self->_error( $self->_no_buffer_files( $self->archive ) );
567
568 } else {
569 ### if we're on solaris we /might/ be using /bin/tar, which has
570 ### a weird output format... we might also be using
571 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
572 ### fine... so we have to do some guessing here =/
573 my @files = map { chomp;
574 !ON_SOLARIS ? $_
575 : (m|^ x \s+ # 'xtract' -- sigh
576 (.+?), # the actual file name
577 \s+ [\d,.]+ \s bytes,
578 \s+ [\d,.]+ \s tape \s blocks
579 |x ? $1 : $_);
580
581 } split $/, $buffer;
582
583 ### store the files that are in the archive ###
584 $self->files(\@files);
585 }
586 }
587
588 ### now actually extract it ###
589 { my $cmd =
590 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
591 $self->bin_tar, '-xf', '-'] :
9e5a0ef9 592 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
520c99e2
JB
593 $self->bin_tar, '-xf', '-'] :
594 [$self->bin_tar, '-xf', $self->archive];
595
596 my $buffer = '';
597 unless( scalar run( command => $cmd,
598 buffer => \$buffer,
599 verbose => $DEBUG )
600 ) {
601 return $self->_error(loc("Error extracting archive '%1': %2",
602 $self->archive, $buffer ));
603 }
604
605 ### we might not have them, due to lack of buffers
606 if( $self->files ) {
607 ### now that we've extracted, figure out where we extracted to
608 my $dir = $self->__get_extract_dir( $self->files );
609
610 ### store the extraction dir ###
611 $self->extract_path( $dir );
612 }
613 }
614
615 ### we got here, no error happened
616 return 1;
617}
618
619### use archive::tar to extract ###
620sub _untar_at {
621 my $self = shift;
622
623 ### we definitely need A::T, so load that first
624 { my $use_list = { 'Archive::Tar' => '0.0' };
625
626 unless( can_load( modules => $use_list ) ) {
627
628 return $self->_error(loc("You do not have '%1' installed - " .
629 "Please install it as soon as possible.",
630 'Archive::Tar'));
631 }
632 }
633
634 ### we might pass it a filehandle if it's a .tbz file..
635 my $fh_to_read = $self->archive;
636
637 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
638 ### if A::T's version is 0.99 or higher
639 if( $self->is_tgz ) {
640 my $use_list = { 'Compress::Zlib' => '0.0' };
641 $use_list->{ 'IO::Zlib' } = '0.0'
642 if $Archive::Tar::VERSION >= '0.99';
643
644 unless( can_load( modules => $use_list ) ) {
645 my $which = join '/', sort keys %$use_list;
646
647 return $self->_error(loc(
648 "You do not have '%1' installed - Please ".
649 "install it as soon as possible.", $which));
650
651 }
652 } elsif ( $self->is_tbz ) {
653 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
654 unless( can_load( modules => $use_list ) ) {
655 return $self->_error(loc(
656 "You do not have '%1' installed - Please " .
657 "install it as soon as possible.",
658 'IO::Uncompress::Bunzip2'));
659 }
660
661 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
662 return $self->_error(loc("Unable to open '%1': %2",
663 $self->archive,
664 $IO::Uncompress::Bunzip2::Bunzip2Error));
665
666 $fh_to_read = $bz;
667 }
668
669 my $tar = Archive::Tar->new();
670
671 ### only tell it it's compressed if it's a .tgz, as we give it a file
672 ### handle if it's a .tbz
673 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
674 return $self->_error(loc("Unable to read '%1': %2", $self->archive,
675 $Archive::Tar::error));
676 }
677
678 ### workaround to prevent Archive::Tar from setting uid, which
679 ### is a potential security hole. -autrijus
680 ### have to do it here, since A::T needs to be /loaded/ first ###
681 { no strict 'refs'; local $^W;
682
683 ### older versions of archive::tar <= 0.23
684 *Archive::Tar::chown = sub {};
685 }
686
687 ### for version of archive::tar > 1.04
688 local $Archive::Tar::Constant::CHOWN = 0;
689
690 { local $^W; # quell 'splice() offset past end of array' warnings
691 # on older versions of A::T
692
693 ### older archive::tar always returns $self, return value slightly
694 ### fux0r3d because of it.
695 $tar->extract()
696 or return $self->_error(loc("Unable to extract '%1': %2",
697 $self->archive, $Archive::Tar::error ));
698 }
699
700 my @files = $tar->list_files;
701 my $dir = $self->__get_extract_dir( \@files );
702
703 ### store the files that are in the archive ###
704 $self->files(\@files);
705
706 ### store the extraction dir ###
707 $self->extract_path( $dir );
708
709 ### check if the dir actually appeared ###
710 return 1 if -d $self->extract_path;
711
712 ### no dir, we failed ###
713 return $self->_error(loc("Unable to extract '%1': %2",
714 $self->archive, $Archive::Tar::error ));
715}
716
717#################################
718#
719# Gunzip code
720#
721#################################
722
723### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip
724### depending on $PREFER_BIN
725sub _gunzip {
726 my $self = shift;
727
728 my @methods = qw[_gunzip_cz _gunzip_bin];
729 @methods = reverse @methods if $PREFER_BIN;
730
731 for my $method (@methods) {
732 $self->_extractor($method) && return 1 if $self->$method();
733 }
734
735 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
736}
737
738sub _gunzip_bin {
739 my $self = shift;
740
741 ### check for /bin/gzip -- we need it ###
742 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
743 unless $self->bin_gzip;
744
745
746 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
747 return $self->_error(loc("Could not open '%1' for writing: %2",
748 $self->_gunzip_to, $! ));
749
750 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
751
752 my $buffer;
753 unless( scalar run( command => $cmd,
754 verbose => $DEBUG,
755 buffer => \$buffer )
756 ) {
757 return $self->_error(loc("Unable to gunzip '%1': %2",
758 $self->archive, $buffer));
759 }
760
761 ### no buffers available?
762 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
763 $self->_error( $self->_no_buffer_content( $self->archive ) );
764 }
765
766 print $fh $buffer if defined $buffer;
767
768 close $fh;
769
770 ### set what files where extract, and where they went ###
771 $self->files( [$self->_gunzip_to] );
772 $self->extract_path( File::Spec->rel2abs(cwd()) );
773
774 return 1;
775}
776
777sub _gunzip_cz {
778 my $self = shift;
779
780 my $use_list = { 'Compress::Zlib' => '0.0' };
781 unless( can_load( modules => $use_list ) ) {
782 return $self->_error(loc("You do not have '%1' installed - Please " .
783 "install it as soon as possible.", 'Compress::Zlib'));
784 }
785
786 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
787 return $self->_error(loc("Unable to open '%1': %2",
788 $self->archive, $Compress::Zlib::gzerrno));
789
790 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
791 return $self->_error(loc("Could not open '%1' for writing: %2",
792 $self->_gunzip_to, $! ));
793
794 my $buffer;
795 $fh->print($buffer) while $gz->gzread($buffer) > 0;
796 $fh->close;
797
798 ### set what files where extract, and where they went ###
799 $self->files( [$self->_gunzip_to] );
800 $self->extract_path( File::Spec->rel2abs(cwd()) );
801
802 return 1;
803}
804
805#################################
806#
1dae2fb5
RGS
807# Uncompress code
808#
809#################################
810
811
812### untar wrapper... goes to either Archive::Tar or /bin/tar
813### depending on $PREFER_BIN
814sub _uncompress {
815 my $self = shift;
816
817 my @methods = qw[_gunzip_cz _uncompress_bin];
818 @methods = reverse @methods if $PREFER_BIN;
819
820 for my $method (@methods) {
821 $self->_extractor($method) && return 1 if $self->$method();
822 }
823
824 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
825}
826
827sub _uncompress_bin {
828 my $self = shift;
829
830 ### check for /bin/gzip -- we need it ###
831 return $self->_error(loc("No '%1' program found", '/bin/uncompress'))
832 unless $self->bin_uncompress;
833
834
835 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
836 return $self->_error(loc("Could not open '%1' for writing: %2",
837 $self->_gunzip_to, $! ));
838
839 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
840
841 my $buffer;
842 unless( scalar run( command => $cmd,
843 verbose => $DEBUG,
844 buffer => \$buffer )
845 ) {
846 return $self->_error(loc("Unable to uncompress '%1': %2",
847 $self->archive, $buffer));
848 }
849
850 ### no buffers available?
851 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
852 $self->_error( $self->_no_buffer_content( $self->archive ) );
853 }
854
855 print $fh $buffer if defined $buffer;
856
857 close $fh;
858
859 ### set what files where extract, and where they went ###
860 $self->files( [$self->_gunzip_to] );
861 $self->extract_path( File::Spec->rel2abs(cwd()) );
862
863 return 1;
864}
865
866
867#################################
868#
520c99e2
JB
869# Unzip code
870#
871#################################
872
873### unzip wrapper... goes to either Archive::Zip or /bin/unzip
874### depending on $PREFER_BIN
875sub _unzip {
876 my $self = shift;
877
878 my @methods = qw[_unzip_az _unzip_bin];
879 @methods = reverse @methods if $PREFER_BIN;
880
881 for my $method (@methods) {
882 $self->_extractor($method) && return 1 if $self->$method();
883 }
884
885 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
886}
887
888sub _unzip_bin {
889 my $self = shift;
890
891 ### check for /bin/gzip if we need it ###
892 return $self->_error(loc("No '%1' program found", '/bin/unzip'))
893 unless $self->bin_unzip;
894
895
896 ### first, get the files.. it must be 2 different commands with 'unzip' :(
9e5a0ef9
JB
897 { ### on VMS, capital letter options have to be quoted. This is
898 ### peported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
899 ### Subject: [patch@31735]Archive Extract fix on VMS.
900 my $opt = ON_VMS ? '"-Z"' : '-Z';
901 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
902
520c99e2
JB
903 my $buffer = '';
904 unless( scalar run( command => $cmd,
905 verbose => $DEBUG,
906 buffer => \$buffer )
907 ) {
908 return $self->_error(loc("Unable to unzip '%1': %2",
909 $self->archive, $buffer));
910 }
911
912 ### no buffers available?
913 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
914 $self->_error( $self->_no_buffer_files( $self->archive ) );
915
916 } else {
917 $self->files( [split $/, $buffer] );
918 }
919 }
920
921 ### now, extract the archive ###
922 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
923
924 my $buffer;
925 unless( scalar run( command => $cmd,
926 verbose => $DEBUG,
927 buffer => \$buffer )
928 ) {
929 return $self->_error(loc("Unable to unzip '%1': %2",
930 $self->archive, $buffer));
931 }
932
933 if( scalar @{$self->files} ) {
934 my $files = $self->files;
935 my $dir = $self->__get_extract_dir( $files );
936
937 $self->extract_path( $dir );
938 }
939 }
940
941 return 1;
942}
943
944sub _unzip_az {
945 my $self = shift;
946
947 my $use_list = { 'Archive::Zip' => '0.0' };
948 unless( can_load( modules => $use_list ) ) {
949 return $self->_error(loc("You do not have '%1' installed - Please " .
950 "install it as soon as possible.", 'Archive::Zip'));
951 }
952
953 my $zip = Archive::Zip->new();
954
955 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
956 return $self->_error(loc("Unable to read '%1'", $self->archive));
957 }
958
959 my @files;
960 ### have to extract every memeber individually ###
961 for my $member ($zip->members) {
962 push @files, $member->{fileName};
963
964 unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
965 return $self->_error(loc("Extraction of '%1' from '%2' failed",
966 $member->{fileName}, $self->archive ));
967 }
968 }
969
970 my $dir = $self->__get_extract_dir( \@files );
971
972 ### set what files where extract, and where they went ###
973 $self->files( \@files );
974 $self->extract_path( File::Spec->rel2abs($dir) );
975
976 return 1;
977}
978
979sub __get_extract_dir {
980 my $self = shift;
981 my $files = shift || [];
982
983 return unless scalar @$files;
984
985 my($dir1, $dir2);
986 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
987 my($dir,$pos) = @$aref;
988
989 ### add a catdir(), so that any trailing slashes get
990 ### take care of (removed)
991 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
992 ### which was the problem in bug #23999
993 my $res = -d $files->[$pos]
994 ? File::Spec->catdir( $files->[$pos], '' )
995 : File::Spec->catdir( dirname( $files->[$pos] ) );
996
997 $$dir = $res;
998 }
999
1000 ### if the first and last dir don't match, make sure the
1001 ### dirname is not set wrongly
1002 my $dir;
1003
1004 ### dirs are the same, so we know for sure what the extract dir is
1005 if( $dir1 eq $dir2 ) {
1006 $dir = $dir1;
1007
1008 ### dirs are different.. do they share the base dir?
1009 ### if so, use that, if not, fall back to '.'
1010 } else {
1011 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1012 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1013
1014 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1015 }
1016
1017 return File::Spec->rel2abs( $dir );
1018}
1019
1020#################################
1021#
1022# Bunzip2 code
1023#
1024#################################
1025
1026### bunzip2 wrapper...
1027sub _bunzip2 {
1028 my $self = shift;
1029
1030 my @methods = qw[_bunzip2_cz2 _bunzip2_bin];
1031 @methods = reverse @methods if $PREFER_BIN;
1032
1033 for my $method (@methods) {
1034 $self->_extractor($method) && return 1 if $self->$method();
1035 }
1036
1037 return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive));
1038}
1039
1040sub _bunzip2_bin {
1041 my $self = shift;
1042
1043 ### check for /bin/gzip -- we need it ###
1044 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
1045 unless $self->bin_bunzip2;
1046
1047
1048 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1049 return $self->_error(loc("Could not open '%1' for writing: %2",
1050 $self->_gunzip_to, $! ));
9e5a0ef9
JB
1051
1052 ### guard against broken bunzip2. See ->have_old_bunzip2()
1053 ### for details
1054 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1055 return $self->_error(loc("Your bunzip2 version is too old and ".
1056 "can only extract files ending in '%1'",
1057 '.bz2'));
1058 }
520c99e2 1059
9e5a0ef9 1060 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
520c99e2
JB
1061
1062 my $buffer;
1063 unless( scalar run( command => $cmd,
1064 verbose => $DEBUG,
1065 buffer => \$buffer )
1066 ) {
1067 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1068 $self->archive, $buffer));
1069 }
1070
1071 ### no buffers available?
1072 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1073 $self->_error( $self->_no_buffer_content( $self->archive ) );
1074 }
1075
1076 print $fh $buffer if defined $buffer;
1077
1078 close $fh;
1079
1080 ### set what files where extract, and where they went ###
1081 $self->files( [$self->_gunzip_to] );
1082 $self->extract_path( File::Spec->rel2abs(cwd()) );
1083
1084 return 1;
1085}
1086
1087### using cz2, the compact versions... this we use mainly in archive::tar
1088### extractor..
1089# sub _bunzip2_cz1 {
1090# my $self = shift;
1091#
1092# my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1093# unless( can_load( modules => $use_list ) ) {
1094# return $self->_error(loc("You do not have '%1' installed - Please " .
1095# "install it as soon as possible.",
1096# 'IO::Uncompress::Bunzip2'));
1097# }
1098#
1099# my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1100# return $self->_error(loc("Unable to open '%1': %2",
1101# $self->archive,
1102# $IO::Uncompress::Bunzip2::Bunzip2Error));
1103#
1104# my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1105# return $self->_error(loc("Could not open '%1' for writing: %2",
1106# $self->_gunzip_to, $! ));
1107#
1108# my $buffer;
1109# $fh->print($buffer) while $bz->read($buffer) > 0;
1110# $fh->close;
1111#
1112# ### set what files where extract, and where they went ###
1113# $self->files( [$self->_gunzip_to] );
1114# $self->extract_path( File::Spec->rel2abs(cwd()) );
1115#
1116# return 1;
1117# }
1118
1119sub _bunzip2_cz2 {
1120 my $self = shift;
1121
1122 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1123 unless( can_load( modules => $use_list ) ) {
1124 return $self->_error(loc("You do not have '%1' installed - Please " .
1125 "install it as soon as possible.",
1126 'IO::Uncompress::Bunzip2'));
1127 }
1128
1129 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1130 or return $self->_error(loc("Unable to uncompress '%1': %2",
1131 $self->archive,
1132 $IO::Uncompress::Bunzip2::Bunzip2Error));
1133
1134 ### set what files where extract, and where they went ###
1135 $self->files( [$self->_gunzip_to] );
1136 $self->extract_path( File::Spec->rel2abs(cwd()) );
1137
1138 return 1;
1139}
1140
1141
1142#################################
1143#
1144# Error code
1145#
1146#################################
1147
1148sub _error {
1149 my $self = shift;
1150 my $error = shift;
1151
1152 $self->_error_msg( $error );
1153 $self->_error_msg_long( Carp::longmess($error) );
1154
1155 ### set $Archive::Extract::WARN to 0 to disable printing
1156 ### of errors
1157 if( $WARN ) {
1158 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1159 }
1160
1161 return;
1162}
1163
1164sub error {
1165 my $self = shift;
1166 return shift() ? $self->_error_msg_long : $self->_error_msg;
1167}
1168
1169sub _no_buffer_files {
1170 my $self = shift;
1171 my $file = shift or return;
1172 return loc("No buffer captured, unable to tell ".
1173 "extracted files or extraction dir for '%1'", $file);
1174}
1175
1176sub _no_buffer_content {
1177 my $self = shift;
1178 my $file = shift or return;
1179 return loc("No buffer captured, unable to get content for '%1'", $file);
1180}
11811;
1182
1183=pod
1184
1185=head1 HOW IT WORKS
1186
1187C<Archive::Extract> tries first to determine what type of archive you
1188are passing it, by inspecting its suffix. It does not do this by using
1189Mime magic, or something related. See C<CAVEATS> below.
1190
1191Once it has determined the file type, it knows which extraction methods
1192it can use on the archive. It will try a perl solution first, then fall
1193back to a commandline tool if that fails. If that also fails, it will
1194return false, indicating it was unable to extract the archive.
1195See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1196
1197=head1 CAVEATS
1198
1199=head2 File Extensions
1200
1201C<Archive::Extract> trusts on the extension of the archive to determine
1202what type it is, and what extractor methods therefore can be used. If
1203your archives do not have any of the extensions as described in the
1204C<new()> method, you will have to specify the type explicitly, or
1205C<Archive::Extract> will not be able to extract the archive for you.
1206
9e5a0ef9
JB
1207=head2 Supporting Very Large Files
1208
1209C<Archive::Extract> can use either pure perl modules or command line
1210programs under the hood. Some of the pure perl modules (like
1211C<Archive::Tar> take the entire contents of the archive into memory,
1212which may not be feasible on your system. Consider setting the global
1213variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1214the use of command line programs and won't consume so much memory.
1215
1216See the C<GLOBAL VARIABLES> section below for details.
1217
1218=head2 Bunzip2 support of arbitrary extensions.
1219
1220Older versions of C</bin/bunzip2> do not support arbitrary file
1221extensions and insist on a C<.bz2> suffix. Although we do our best
1222to guard against this, if you experience a bunzip2 error, it may
1223be related to this. For details, please see the C<have_old_bunzip2>
1224method.
1225
520c99e2
JB
1226=head1 GLOBAL VARIABLES
1227
1228=head2 $Archive::Extract::DEBUG
1229
1230Set this variable to C<true> to have all calls to command line tools
1231be printed out, including all their output.
1232This also enables C<Carp::longmess> errors, instead of the regular
1233C<carp> errors.
1234
1235Good for tracking down why things don't work with your particular
1236setup.
1237
1238Defaults to C<false>.
1239
1240=head2 $Archive::Extract::WARN
1241
1242This variable controls whether errors encountered internally by
1243C<Archive::Extract> should be C<carp>'d or not.
1244
1245Set to false to silence warnings. Inspect the output of the C<error()>
1246method manually to see what went wrong.
1247
1248Defaults to C<true>.
1249
1250=head2 $Archive::Extract::PREFER_BIN
1251
1252This variables controls whether C<Archive::Extract> should prefer the
1253use of perl modules, or commandline tools to extract archives.
1254
1255Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1256
1257Defaults to C<false>.
1258
1259=head1 TODO
1260
1261=over 4
1262
1263=item Mime magic support
1264
1265Maybe this module should use something like C<File::Type> to determine
1266the type, rather than blindly trust the suffix.
1267
1dae2fb5
RGS
1268=back
1269
574b415d 1270=head1 BUG REPORTS
520c99e2 1271
574b415d 1272Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
520c99e2 1273
574b415d
RGS
1274=head1 AUTHOR
1275
1276This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
520c99e2 1277
574b415d 1278=head1 COPYRIGHT
520c99e2 1279
574b415d
RGS
1280This library is free software; you may redistribute and/or modify it
1281under the same terms as Perl itself.
520c99e2
JB
1282
1283=cut
1284
1285# Local variables:
1286# c-indentation-style: bsd
1287# c-basic-offset: 4
1288# indent-tabs-mode: nil
1289# End:
1290# vim: expandtab shiftwidth=4:
1291