1 package Archive::Extract;
2 use if $] > 5.017, 'deprecate';
8 use IPC::Cmd qw[run can_run];
10 use File::Path qw[mkpath];
12 use File::Basename qw[dirname basename];
13 use Params::Check qw[check];
14 use Module::Load::Conditional qw[can_load check_install];
15 use Locale::Maketext::Simple Style => 'gettext';
17 ### solaris has silly /bin/tar output ###
18 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
19 use constant ON_NETBSD => $^O eq 'netbsd' ? 1 : 0;
20 use constant ON_OPENBSD => $^O eq 'openbsd' ? 1 : 0;
21 use constant ON_FREEBSD => $^O eq 'freebsd' ? 1 : 0;
22 use constant ON_LINUX => $^O eq 'linux' ? 1 : 0;
23 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
25 ### VMS may require quoting upper case command options
26 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
28 ### Windows needs special treatment of Tar options
29 use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
31 ### we can't use this extraction method, because of missing
33 use constant METHOD_NA => [];
35 ### If these are changed, update @TYPES and the new() POD
36 use constant TGZ => 'tgz';
37 use constant TAR => 'tar';
38 use constant GZ => 'gz';
39 use constant ZIP => 'zip';
40 use constant BZ2 => 'bz2';
41 use constant TBZ => 'tbz';
42 use constant Z => 'Z';
43 use constant LZMA => 'lzma';
44 use constant XZ => 'xz';
45 use constant TXZ => 'txz';
47 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
48 $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER
55 $_ALLOW_PURE_PERL = 1; # allow pure perl extractors
56 $_ALLOW_BIN = 1; # allow binary extractors
57 $_ALLOW_TAR_ITER = 1; # try to use Archive::Tar->iter if available
59 # same as all constants
60 my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA, XZ, TXZ );
62 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
68 Archive::Extract - A generic archive extracting mechanism
74 ### build an Archive::Extract object ###
75 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
77 ### extract to cwd() ###
78 my $ok = $ae->extract;
80 ### extract to /tmp ###
81 my $ok = $ae->extract( to => '/tmp' );
83 ### what if something went wrong?
84 my $ok = $ae->extract or die $ae->error;
86 ### files from the archive ###
87 my $files = $ae->files;
89 ### dir that was extracted to ###
90 my $outdir = $ae->extract_path;
93 ### quick check methods ###
94 $ae->is_tar # is it a .tar file?
95 $ae->is_tgz # is it a .tar.gz or .tgz file?
96 $ae->is_gz; # is it a .gz file?
97 $ae->is_zip; # is it a .zip file?
98 $ae->is_bz2; # is it a .bz2 file?
99 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
100 $ae->is_lzma; # is it a .lzma file?
101 $ae->is_xz; # is it a .xz file?
102 $ae->is_txz; # is it a .tar.xz or .txz file?
104 ### absolute path to the archive you provided ###
107 ### commandline tools, if found ###
108 $ae->bin_tar # path to /bin/tar, if found
109 $ae->bin_gzip # path to /bin/gzip, if found
110 $ae->bin_unzip # path to /bin/unzip, if found
111 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
112 $ae->bin_unlzma # path to /bin/unlzma if found
113 $ae->bin_unxz # path to /bin/unxz if found
117 Archive::Extract is a generic archive extraction mechanism.
119 It allows you to extract any archive file of the type .tar, .tar.gz,
120 .gz, .Z, tar.bz2, .tbz, .bz2, .zip, .xz,, .txz, .tar.xz or .lzma
121 without having to worry how it
122 does so, or use different interfaces for each type by using either
123 perl modules, or commandline tools on your system.
125 See the C<HOW IT WORKS> section further down for details.
130 ### see what /bin/programs are available ###
132 CMD: for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma unxz]) {
133 if ( $pgm eq 'unzip' and ( ON_NETBSD or ON_FREEBSD ) ) {
134 local $IPC::Cmd::INSTANCES = 1;
135 ($PROGRAMS->{$pgm}) = grep { ON_NETBSD ? m!/usr/pkg/! : m!/usr/local! } can_run($pgm);
138 if ( $pgm eq 'unzip' and ON_LINUX ) {
139 # Check if 'unzip' is busybox masquerading
140 local $IPC::Cmd::INSTANCES = 1;
141 my $opt = ON_VMS ? '"-Z"' : '-Z';
142 ($PROGRAMS->{$pgm}) = grep { scalar run(command=> [ $_, $opt, '-1' ]) } can_run($pgm);
145 if ( $pgm eq 'tar' and ON_OPENBSD ) {
147 next CMD if $PROGRAMS->{$pgm} = can_run('gtar');
149 $PROGRAMS->{$pgm} = can_run($pgm);
152 ### mapping from types to extractor methods ###
153 my $Mapping = { # binary program # pure perl module
154 is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
155 is_tar => { bin => '_untar_bin', pp => '_untar_at' },
156 is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
157 is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
158 is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
159 is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
160 is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
161 is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
162 is_xz => { bin => '_unxz_bin', pp => '_unxz_cz' },
163 is_txz => { bin => '_untar_bin', pp => '_untar_at' },
166 { ### use subs so we re-generate array refs etc for the no-override flags
167 ### if we don't, then we reuse the same arrayref, meaning objects store
170 archive => sub { { required => 1, allow => FILE_EXISTS } },
171 type => sub { { default => '', allow => [ @Types ] } },
172 _error_msg => sub { { no_override => 1, default => [] } },
173 _error_msg_long => sub { { no_override => 1, default => [] } },
176 ### build accessors ###
177 for my $method( keys %$tmpl,
178 qw[_extractor _gunzip_to files extract_path],
183 $self->{$method} = $_[0] if @_;
184 return $self->{$method};
190 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
192 Creates a new C<Archive::Extract> object based on the archive file you
193 passed it. Automatically determines the type of archive based on the
194 extension, but you can override that by explicitly providing the
197 Valid values for C<type> are:
203 Standard tar files, as produced by, for example, C</bin/tar>.
204 Corresponds to a C<.tar> suffix.
208 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
209 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
213 Gzip compressed file, as produced by, for example C</bin/gzip>.
214 Corresponds to a C<.gz> suffix.
218 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
219 Corresponds to a C<.Z> suffix.
223 Zip compressed file, as produced by, for example C</bin/zip>.
224 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
228 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
229 Corresponds to a C<.bz2> suffix.
233 Bzip2 compressed tar file, as produced by, for example C</bin/tar -j>.
234 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
238 Lzma compressed file, as produced by C</bin/lzma>.
239 Corresponds to a C<.lzma> suffix.
243 Xz compressed file, as produced by C</bin/xz>.
244 Corresponds to a C<.xz> suffix.
248 Xz compressed tar file, as produced by, for example C</bin/tar -J>.
249 Corresponds to a C<.txz> or C<.tar.xz> suffix.
253 Returns a C<Archive::Extract> object on success, or false on failure.
262 ### see above why we use subs here and generate the template;
263 ### it's basically to not re-use arrayrefs
264 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
266 my $parsed = check( \%utmpl, \%hash ) or return;
268 ### make sure we have an absolute path ###
269 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
271 ### figure out the type, if it wasn't already specified ###
272 unless ( $parsed->{type} ) {
274 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
275 $ar =~ /.+?\.gz$/i ? GZ :
276 $ar =~ /.+?\.tar$/i ? TAR :
277 $ar =~ /.+?\.(zip|jar|ear|war|par)$/i ? ZIP :
278 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
279 $ar =~ /.+?\.bz2$/i ? BZ2 :
280 $ar =~ /.+?\.Z$/ ? Z :
281 $ar =~ /.+?\.lzma$/ ? LZMA :
282 $ar =~ /.+?\.(?:txz|tar\.xz)$/i ? TXZ :
283 $ar =~ /.+?\.xz$/ ? XZ :
288 bless $parsed, $class;
290 ### don't know what type of file it is
291 ### XXX this *has* to be an object call, not a package call
292 return $parsed->_error(loc("Cannot determine file type for '%1'",
293 $parsed->{archive} )) unless $parsed->{type};
298 =head2 $ae->extract( [to => '/output/path'] )
300 Extracts the archive represented by the C<Archive::Extract> object to
301 the path of your choice as specified by the C<to> argument. Defaults to
304 Since C<.gz> files never hold a directory, but only a single file; if
305 the C<to> argument is an existing directory, the file is extracted
306 there, with its C<.gz> suffix stripped.
307 If the C<to> argument is not an existing directory, the C<to> argument
308 is understood to be a filename, if the archive type is C<gz>.
309 In the case that you did not specify a C<to> argument, the output
310 file will be the name of the archive file, stripped from its C<.gz>
311 suffix, in the current working directory.
313 C<extract> will try a pure perl solution first, and then fall back to
314 commandline tools if they are available. See the C<GLOBAL VARIABLES>
315 section below on how to alter this behaviour.
317 It will return true on success, and false on failure.
319 On success, it will also set the follow attributes in the object:
323 =item $ae->extract_path
325 This is the directory that the files where extracted to.
329 This is an array ref with the paths of all the files in the archive,
330 relative to the C<to> argument you specified.
331 To get the full path to an extracted file, you would use:
333 File::Spec->catfile( $to, $ae->files->[0] );
335 Note that all files from a tar archive will be in unix format, as per
336 the tar specification.
346 ### reset error messages
347 $self->_error_msg( [] );
348 $self->_error_msg_long( [] );
352 to => { default => '.', store => \$to }
355 check( $tmpl, \%hash ) or return;
357 ### so 'to' could be a file or a dir, depending on whether it's a .gz
358 ### file, or basically anything else.
359 ### so, check that, then act accordingly.
360 ### set an accessor specifically so _gunzip can know what file to extract
364 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma or $self->is_xz ) {
366 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma|xz)$//i;
371 $self->_gunzip_to( basename($cp) );
373 ### then it's a filename
376 $self->_gunzip_to( basename($to) );
379 ### not a foo.gz file
385 ### make the dir if it doesn't exist ###
387 eval { mkpath( $dir ) };
389 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
393 ### get the current dir, to restore later ###
399 ### chdir to the target dir ###
400 unless( chdir $dir ) {
401 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
402 $ok = 0; last EXTRACT;
405 ### set files to an empty array ref, so there's always an array
406 ### ref IN the accessor, to avoid errors like:
407 ### Can't use an undefined value as an ARRAY reference at
408 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
411 ### find out the dispatch methods needed for this type of
412 ### archive. Do a $self->is_XXX to figure out the type, then
413 ### get the hashref with bin + pure perl dispatchers.
414 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
416 ### add pure perl extractor if allowed & add bin extractor if allowed
418 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
419 push @methods, $map->{'bin'} if $_ALLOW_BIN;
421 ### reverse it if we prefer bin extractors
422 @methods = reverse @methods if $PREFER_BIN;
425 for my $method (@methods) {
426 $self->debug( "# Extracting with ->$method\n" );
428 my $rv = $self->$method;
430 ### a positive extraction
431 if( $rv and $rv ne METHOD_NA ) {
432 $self->debug( "# Extraction succeeded\n" );
433 $self->_extractor($method);
436 ### method is not available
437 } elsif ( $rv and $rv eq METHOD_NA ) {
438 $self->debug( "# Extraction method not available\n" );
441 $self->debug( "# Extraction method failed\n" );
446 ### warn something went wrong if we didn't get an extractor
447 unless( $self->_extractor ) {
448 my $diag = $fail ? loc("Extract failed due to errors") :
449 $na ? loc("Extract failed; no extractors available") :
452 $self->_error($diag);
457 ### and chdir back ###
458 unless( chdir $cwd ) {
459 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
470 =head2 $ae->error([BOOL])
472 Returns the last encountered error as string.
473 Pass it a true value to get the C<Carp::longmess()> output instead.
475 =head2 $ae->extract_path
477 This is the directory the archive got extracted to.
478 See C<extract()> for details.
482 This is an array ref holding all the paths from the archive.
483 See C<extract()> for details.
487 This is the full path to the archive file represented by this
488 C<Archive::Extract> object.
492 This is the type of archive represented by this C<Archive::Extract>
493 object. See accessors below for an easier way to use this.
494 See the C<new()> method for details.
498 Returns a list of all known C<types> for C<Archive::Extract>'s
503 sub types { return @Types }
507 Returns true if the file is of type C<.tar.gz>.
508 See the C<new()> method for details.
512 Returns true if the file is of type C<.tar>.
513 See the C<new()> method for details.
517 Returns true if the file is of type C<.gz>.
518 See the C<new()> method for details.
522 Returns true if the file is of type C<.Z>.
523 See the C<new()> method for details.
527 Returns true if the file is of type C<.zip>.
528 See the C<new()> method for details.
532 Returns true if the file is of type C<.lzma>.
533 See the C<new()> method for details.
537 Returns true if the file is of type C<.xz>.
538 See the C<new()> method for details.
542 ### quick check methods ###
543 sub is_tgz { return $_[0]->type eq TGZ }
544 sub is_tar { return $_[0]->type eq TAR }
545 sub is_gz { return $_[0]->type eq GZ }
546 sub is_zip { return $_[0]->type eq ZIP }
547 sub is_tbz { return $_[0]->type eq TBZ }
548 sub is_bz2 { return $_[0]->type eq BZ2 }
549 sub is_Z { return $_[0]->type eq Z }
550 sub is_lzma { return $_[0]->type eq LZMA }
551 sub is_xz { return $_[0]->type eq XZ }
552 sub is_txz { return $_[0]->type eq TXZ }
558 Returns the full path to your tar binary, if found.
562 Returns the full path to your gzip binary, if found
564 =head2 $ae->bin_unzip
566 Returns the full path to your unzip binary, if found
568 =head2 $ae->bin_unlzma
570 Returns the full path to your unlzma binary, if found
574 Returns the full path to your unxz binary, if found
578 ### paths to commandline tools ###
579 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
580 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
581 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
582 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
583 sub bin_uncompress { return $PROGRAMS->{'uncompress'}
584 if $PROGRAMS->{'uncompress'} }
585 sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
586 sub bin_unxz { return $PROGRAMS->{'unxz'} if $PROGRAMS->{'unxz'} }
588 =head2 $bool = $ae->have_old_bunzip2
590 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
591 require all archive names to end in C<.bz2> or it will not extract
592 them. This method checks if you have a recent version of C<bunzip2>
593 that allows any extension, or an older one that doesn't.
597 sub have_old_bunzip2 {
600 ### no bunzip2? no old bunzip2 either :)
601 return unless $self->bin_bunzip2;
603 ### if we can't run this, we can't be sure if it's too old or not
604 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
605 ### is not a request to extract data:
606 ### $ bunzip2 --version
607 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
609 ### bunzip2: I won't read compressed data from a terminal.
610 ### bunzip2: For help, type: `bunzip2 --help'.
615 ### double hateful: bunzip2 --version also hangs if input is a pipe
616 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
617 ### So, we have to provide *another* argument which is a fake filename,
618 ### just so it wont try to read from stdin to print its version..
620 ### Even if the file exists, it won't clobber or change it.
623 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
629 return unless $buffer;
631 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
633 return 1 if $version < 1;
637 #################################
641 #################################
643 ### annoying issue with (gnu) tar on win32, as illustrated by this
644 ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
645 ### which shows that (gnu) tar will interpret a file name with a :
646 ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
647 ### as a remote shell, and the extract fails.
649 if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
651 ### if this is gnu tar we are running, we need to use --force-local
652 push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
656 ### use /bin/tar to extract ###
660 ### check for /bin/tar ###
661 ### check for /bin/gzip if we need it ###
662 ### if any of the binaries are not available, return NA
663 { my $diag = !$self->bin_tar ?
664 loc("No '%1' program found", '/bin/tar') :
665 $self->is_tgz && !$self->bin_gzip ?
666 loc("No '%1' program found", '/bin/gzip') :
667 $self->is_tbz && !$self->bin_bunzip2 ?
668 loc("No '%1' program found", '/bin/bunzip2') :
669 $self->is_txz && !$self->bin_unxz ?
670 loc("No '%1' program found", '/bin/unxz') :
674 $self->_error( $diag );
679 ### XXX figure out how to make IPC::Run do this in one call --
680 ### currently i don't know how to get output of a command after a pipe
681 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
683 ### see what command we should run, based on whether
684 ### it's a .tgz or .tar
686 ### GNU tar can't handled VMS filespecs, but VMSTAR can handle Unix filespecs.
687 my $archive = $self->archive;
688 $archive = VMS::Filespec::unixify($archive) if ON_VMS;
690 ### XXX solaris tar and bsdtar are having different outputs
691 ### depending whether you run with -x or -t
692 ### compensate for this insanity by running -t first, then -x
694 $self->is_tgz ? [$self->bin_gzip, '-cdf', $archive, '|',
695 $self->bin_tar, '-tf', '-'] :
696 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
697 $self->bin_tar, '-tf', '-'] :
698 $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
699 $self->bin_tar, '-tf', '-'] :
700 [$self->bin_tar, @ExtraTarFlags, '-tf', $archive];
703 ### newer versions of 'tar' (1.21 and up) now print record size
704 ### to STDERR as well if v OR t is given (used to be both). This
705 ### is a 'feature' according to the changelog, so we must now only
706 ### inspect STDOUT, otherwise, failures like these occur:
707 ### http://www.cpantesters.org/cpan/report/3230366
709 my @out = run( command => $cmd,
713 ### command was unsuccessful
715 return $self->_error(loc(
716 "Error listing contents of archive '%1': %2",
717 $archive, $buffer ));
720 ### no buffers available?
721 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
722 $self->_error( $self->_no_buffer_files( $archive ) );
725 ### if we're on solaris we /might/ be using /bin/tar, which has
726 ### a weird output format... we might also be using
727 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
728 ### fine... so we have to do some guessing here =/
729 my @files = map { chomp;
731 : (m|^ x \s+ # 'xtract' -- sigh
732 (.+?), # the actual file name
733 \s+ [\d,.]+ \s bytes,
734 \s+ [\d,.]+ \s tape \s blocks
737 ### only STDOUT, see above. Sometimes, extra whitespace
738 ### is present, so make sure we only pick lines with
740 } grep { length } map { split $/, $_ } join '', @{$out[3]};
742 ### store the files that are in the archive ###
743 $self->files(\@files);
747 ### now actually extract it ###
749 $self->is_tgz ? [$self->bin_gzip, '-cdf', $archive, '|',
750 $self->bin_tar, '-xf', '-'] :
751 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
752 $self->bin_tar, '-xf', '-'] :
753 $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
754 $self->bin_tar, '-xf', '-'] :
755 [$self->bin_tar, @ExtraTarFlags, '-xf', $archive];
758 unless( scalar run( command => $cmd,
762 return $self->_error(loc("Error extracting archive '%1': %2",
763 $archive, $buffer ));
766 ### we might not have them, due to lack of buffers
768 ### now that we've extracted, figure out where we extracted to
769 my $dir = $self->__get_extract_dir( $self->files );
771 ### store the extraction dir ###
772 $self->extract_path( $dir );
776 ### we got here, no error happened
782 ### use archive::tar to extract ###
786 ### Loading Archive::Tar is going to set it to 1, so make it local
787 ### within this block, starting with its initial value. Whatever
788 ### Achive::Tar does will be undone when we return.
790 ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
791 ### so users don't have to even think about this variable. If they
792 ### do, they still get their set value outside of this call.
793 local $Archive::Tar::WARN = $Archive::Tar::WARN;
795 ### we definitely need Archive::Tar, so load that first
796 { my $use_list = { 'Archive::Tar' => '0.0' };
798 unless( can_load( modules => $use_list ) ) {
800 $self->_error(loc("You do not have '%1' installed - " .
801 "Please install it as soon as possible.",
808 ### we might pass it a filehandle if it's a .tbz file..
809 my $fh_to_read = $self->archive;
811 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
812 ### if A::T's version is 0.99 or higher
813 if( $self->is_tgz ) {
814 my $use_list = { 'Compress::Zlib' => '0.0' };
815 $use_list->{ 'IO::Zlib' } = '0.0'
816 if $Archive::Tar::VERSION >= '0.99';
818 unless( can_load( modules => $use_list ) ) {
819 my $which = join '/', sort keys %$use_list;
822 "You do not have '%1' installed - Please ".
823 "install it as soon as possible.", $which)
829 } elsif ( $self->is_tbz ) {
830 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
831 unless( can_load( modules => $use_list ) ) {
833 "You do not have '%1' installed - Please " .
834 "install it as soon as possible.",
835 'IO::Uncompress::Bunzip2')
841 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
842 return $self->_error(loc("Unable to open '%1': %2",
844 $IO::Uncompress::Bunzip2::Bunzip2Error));
847 } elsif ( $self->is_txz ) {
848 my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
849 unless( can_load( modules => $use_list ) ) {
851 "You do not have '%1' installed - Please " .
852 "install it as soon as possible.",
853 'IO::Uncompress::UnXz')
859 my $xz = IO::Uncompress::UnXz->new( $self->archive ) or
860 return $self->_error(loc("Unable to open '%1': %2",
862 $IO::Uncompress::UnXz::UnXzError));
869 ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
870 ### localized $Archive::Tar::WARN already.
871 $Archive::Tar::WARN = $Archive::Extract::WARN;
873 ### only tell it it's compressed if it's a .tgz, as we give it a file
874 ### handle if it's a .tbz
875 my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) );
877 ### for version of Archive::Tar > 1.04
878 local $Archive::Tar::CHOWN = 0;
880 ### use the iterator if we can. it's a feature of A::T 1.40 and up
881 if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) {
884 unless ( $next = Archive::Tar->iter( @read ) ) {
885 return $self->_error(loc(
886 "Unable to read '%1': %2", $self->archive,
887 $Archive::Tar::error));
890 while ( my $file = $next->() ) {
891 push @files, $file->full_path;
893 $file->extract or return $self->_error(loc(
894 "Unable to read '%1': %2",
896 $Archive::Tar::error));
899 ### older version, read the archive into memory
902 my $tar = Archive::Tar->new();
904 unless( $tar->read( @read ) ) {
905 return $self->_error(loc("Unable to read '%1': %2",
906 $self->archive, $Archive::Tar::error));
909 ### workaround to prevent Archive::Tar from setting uid, which
910 ### is a potential security hole. -autrijus
911 ### have to do it here, since A::T needs to be /loaded/ first ###
912 { no strict 'refs'; local $^W;
914 ### older versions of archive::tar <= 0.23
915 *Archive::Tar::chown = sub {};
918 { local $^W; # quell 'splice() offset past end of array' warnings
919 # on older versions of A::T
921 ### older archive::tar always returns $self, return value
922 ### slightly fux0r3d because of it.
923 $tar->extract or return $self->_error(loc(
924 "Unable to extract '%1': %2",
925 $self->archive, $Archive::Tar::error ));
928 @files = $tar->list_files;
932 my $dir = $self->__get_extract_dir( \@files );
934 ### store the files that are in the archive ###
935 $self->files(\@files);
937 ### store the extraction dir ###
938 $self->extract_path( $dir );
940 ### check if the dir actually appeared ###
941 return 1 if -d $self->extract_path;
943 ### no dir, we failed ###
944 return $self->_error(loc("Unable to extract '%1': %2",
945 $self->archive, $Archive::Tar::error ));
948 #################################
952 #################################
957 ### check for /bin/gzip -- we need it ###
958 unless( $self->bin_gzip ) {
959 $self->_error(loc("No '%1' program found", '/bin/gzip'));
963 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
964 return $self->_error(loc("Could not open '%1' for writing: %2",
965 $self->_gunzip_to, $! ));
967 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
970 unless( scalar run( command => $cmd,
974 return $self->_error(loc("Unable to gunzip '%1': %2",
975 $self->archive, $buffer));
978 ### no buffers available?
979 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
980 $self->_error( $self->_no_buffer_content( $self->archive ) );
983 $self->_print($fh, $buffer) if defined $buffer;
987 ### set what files where extract, and where they went ###
988 $self->files( [$self->_gunzip_to] );
989 $self->extract_path( File::Spec->rel2abs(cwd()) );
997 my $use_list = { 'Compress::Zlib' => '0.0' };
998 unless( can_load( modules => $use_list ) ) {
999 $self->_error(loc("You do not have '%1' installed - Please " .
1000 "install it as soon as possible.", 'Compress::Zlib'));
1004 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
1005 return $self->_error(loc("Unable to open '%1': %2",
1006 $self->archive, $Compress::Zlib::gzerrno));
1008 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1009 return $self->_error(loc("Could not open '%1' for writing: %2",
1010 $self->_gunzip_to, $! ));
1013 $self->_print($fh, $buffer) while $gz->gzread($buffer) > 0;
1016 ### set what files where extract, and where they went ###
1017 $self->files( [$self->_gunzip_to] );
1018 $self->extract_path( File::Spec->rel2abs(cwd()) );
1023 #################################
1027 #################################
1029 sub _uncompress_bin {
1032 ### check for /bin/gzip -- we need it ###
1033 unless( $self->bin_uncompress ) {
1034 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
1038 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1039 return $self->_error(loc("Could not open '%1' for writing: %2",
1040 $self->_gunzip_to, $! ));
1042 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
1045 unless( scalar run( command => $cmd,
1047 buffer => \$buffer )
1049 return $self->_error(loc("Unable to uncompress '%1': %2",
1050 $self->archive, $buffer));
1053 ### no buffers available?
1054 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1055 $self->_error( $self->_no_buffer_content( $self->archive ) );
1058 $self->_print($fh, $buffer) if defined $buffer;
1062 ### set what files where extract, and where they went ###
1063 $self->files( [$self->_gunzip_to] );
1064 $self->extract_path( File::Spec->rel2abs(cwd()) );
1070 #################################
1074 #################################
1080 ### check for /bin/gzip if we need it ###
1081 unless( $self->bin_unzip ) {
1082 $self->_error(loc("No '%1' program found", '/bin/unzip'));
1086 ### first, get the files.. it must be 2 different commands with 'unzip' :(
1087 { ### on VMS, capital letter options have to be quoted. This is
1088 ### reported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
1089 ### Subject: [patch@31735]Archive Extract fix on VMS.
1090 my $opt = ON_VMS ? '"-Z"' : '-Z';
1091 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
1094 unless( scalar run( command => $cmd,
1096 buffer => \$buffer )
1098 return $self->_error(loc("Unable to unzip '%1': %2",
1099 $self->archive, $buffer));
1102 ### no buffers available?
1103 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1104 $self->_error( $self->_no_buffer_files( $self->archive ) );
1107 ### Annoyingly, pesky MSWin32 can either have 'native' tools
1108 ### which have \r\n line endings or Cygwin-based tools which
1109 ### have \n line endings. Jan Dubois suggested using this fix
1110 local $/ = ON_WIN32 ? qr/\r?\n/ : "\n";
1111 $self->files( [split $/, $buffer] );
1115 ### now, extract the archive ###
1116 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
1119 unless( scalar run( command => $cmd,
1121 buffer => \$buffer )
1123 return $self->_error(loc("Unable to unzip '%1': %2",
1124 $self->archive, $buffer));
1127 if( scalar @{$self->files} ) {
1128 my $files = $self->files;
1129 my $dir = $self->__get_extract_dir( $files );
1131 $self->extract_path( $dir );
1141 my $use_list = { 'Archive::Zip' => '0.0' };
1142 unless( can_load( modules => $use_list ) ) {
1143 $self->_error(loc("You do not have '%1' installed - Please " .
1144 "install it as soon as possible.", 'Archive::Zip'));
1148 my $zip = Archive::Zip->new();
1150 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1151 return $self->_error(loc("Unable to read '%1'", $self->archive));
1157 ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1158 ### "In my BackPAN indexing, Archive::Zip was extracting things
1159 ### in my script's directory instead of the current working directory.
1160 ### I traced this back through Archive::Zip::_asLocalName which
1161 ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1162 ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1163 ### case, even though I think I'm on the same drive.
1165 ### To fix this, I pass the optional second argument to
1166 ### extractMember using the cwd from Archive::Extract." --bdfoy
1168 ## store cwd() before looping; calls to cwd() can be expensive, and
1169 ### it won't change during the loop
1170 my $extract_dir = cwd();
1172 ### have to extract every member individually ###
1173 for my $member ($zip->members) {
1174 push @files, $member->{fileName};
1176 ### file to extract to, to avoid the above problem
1177 my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1179 unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1180 return $self->_error(loc("Extraction of '%1' from '%2' failed",
1181 $member->{fileName}, $self->archive ));
1185 my $dir = $self->__get_extract_dir( \@files );
1187 ### set what files where extract, and where they went ###
1188 $self->files( \@files );
1189 $self->extract_path( File::Spec->rel2abs($dir) );
1194 sub __get_extract_dir {
1196 my $files = shift || [];
1198 return unless scalar @$files;
1201 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1202 my($dir,$pos) = @$aref;
1204 ### add a catdir(), so that any trailing slashes get
1205 ### take care of (removed)
1206 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1207 ### which was the problem in bug #23999
1208 my $res = -d $files->[$pos]
1209 ? File::Spec->catdir( $files->[$pos], '' )
1210 : File::Spec->catdir( dirname( $files->[$pos] ) );
1215 ### if the first and last dir don't match, make sure the
1216 ### dirname is not set wrongly
1219 ### dirs are the same, so we know for sure what the extract dir is
1220 if( $dir1 eq $dir2 ) {
1223 ### dirs are different.. do they share the base dir?
1224 ### if so, use that, if not, fall back to '.'
1226 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1227 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1229 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1232 return File::Spec->rel2abs( $dir );
1235 #################################
1239 #################################
1244 ### check for /bin/gzip -- we need it ###
1245 unless( $self->bin_bunzip2 ) {
1246 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1250 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1251 return $self->_error(loc("Could not open '%1' for writing: %2",
1252 $self->_gunzip_to, $! ));
1254 ### guard against broken bunzip2. See ->have_old_bunzip2()
1256 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1257 return $self->_error(loc("Your bunzip2 version is too old and ".
1258 "can only extract files ending in '%1'",
1262 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1265 unless( scalar run( command => $cmd,
1267 buffer => \$buffer )
1269 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1270 $self->archive, $buffer));
1273 ### no buffers available?
1274 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1275 $self->_error( $self->_no_buffer_content( $self->archive ) );
1278 $self->_print($fh, $buffer) if defined $buffer;
1282 ### set what files where extract, and where they went ###
1283 $self->files( [$self->_gunzip_to] );
1284 $self->extract_path( File::Spec->rel2abs(cwd()) );
1289 ### using cz2, the compact versions... this we use mainly in archive::tar
1291 # sub _bunzip2_cz1 {
1294 # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1295 # unless( can_load( modules => $use_list ) ) {
1296 # return $self->_error(loc("You do not have '%1' installed - Please " .
1297 # "install it as soon as possible.",
1298 # 'IO::Uncompress::Bunzip2'));
1301 # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1302 # return $self->_error(loc("Unable to open '%1': %2",
1304 # $IO::Uncompress::Bunzip2::Bunzip2Error));
1306 # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1307 # return $self->_error(loc("Could not open '%1' for writing: %2",
1308 # $self->_gunzip_to, $! ));
1311 # $fh->print($buffer) while $bz->read($buffer) > 0;
1314 # ### set what files where extract, and where they went ###
1315 # $self->files( [$self->_gunzip_to] );
1316 # $self->extract_path( File::Spec->rel2abs(cwd()) );
1324 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1325 unless( can_load( modules => $use_list ) ) {
1326 $self->_error(loc("You do not have '%1' installed - Please " .
1327 "install it as soon as possible.",
1328 'IO::Uncompress::Bunzip2'));
1332 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1333 or return $self->_error(loc("Unable to uncompress '%1': %2",
1335 $IO::Uncompress::Bunzip2::Bunzip2Error));
1337 ### set what files where extract, and where they went ###
1338 $self->files( [$self->_gunzip_to] );
1339 $self->extract_path( File::Spec->rel2abs(cwd()) );
1344 #################################
1348 #################################
1353 ### check for /bin/unxz -- we need it ###
1354 unless( $self->bin_unxz ) {
1355 $self->_error(loc("No '%1' program found", '/bin/unxz'));
1359 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1360 return $self->_error(loc("Could not open '%1' for writing: %2",
1361 $self->_gunzip_to, $! ));
1363 my $cmd = [ $self->bin_unxz, '-cdf', $self->archive ];
1366 unless( scalar run( command => $cmd,
1368 buffer => \$buffer )
1370 return $self->_error(loc("Unable to unxz '%1': %2",
1371 $self->archive, $buffer));
1374 ### no buffers available?
1375 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1376 $self->_error( $self->_no_buffer_content( $self->archive ) );
1379 $self->_print($fh, $buffer) if defined $buffer;
1383 ### set what files where extract, and where they went ###
1384 $self->files( [$self->_gunzip_to] );
1385 $self->extract_path( File::Spec->rel2abs(cwd()) );
1393 my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
1394 unless( can_load( modules => $use_list ) ) {
1395 $self->_error(loc("You do not have '%1' installed - Please " .
1396 "install it as soon as possible.",
1397 'IO::Uncompress::UnXz'));
1401 IO::Uncompress::UnXz::unxz($self->archive => $self->_gunzip_to)
1402 or return $self->_error(loc("Unable to uncompress '%1': %2",
1404 $IO::Uncompress::UnXz::UnXzError));
1406 ### set what files where extract, and where they went ###
1407 $self->files( [$self->_gunzip_to] );
1408 $self->extract_path( File::Spec->rel2abs(cwd()) );
1414 #################################
1418 #################################
1423 ### check for /bin/unlzma -- we need it ###
1424 unless( $self->bin_unlzma ) {
1425 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1429 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1430 return $self->_error(loc("Could not open '%1' for writing: %2",
1431 $self->_gunzip_to, $! ));
1433 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1436 unless( scalar run( command => $cmd,
1438 buffer => \$buffer )
1440 return $self->_error(loc("Unable to unlzma '%1': %2",
1441 $self->archive, $buffer));
1444 ### no buffers available?
1445 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1446 $self->_error( $self->_no_buffer_content( $self->archive ) );
1449 $self->_print($fh, $buffer) if defined $buffer;
1453 ### set what files where extract, and where they went ###
1454 $self->files( [$self->_gunzip_to] );
1455 $self->extract_path( File::Spec->rel2abs(cwd()) );
1463 my $use_list1 = { 'IO::Uncompress::UnLzma' => '0.0' };
1464 my $use_list2 = { 'Compress::unLZMA' => '0.0' };
1466 if (can_load( modules => $use_list1 ) ) {
1467 IO::Uncompress::UnLzma::unlzma($self->archive => $self->_gunzip_to)
1468 or return $self->_error(loc("Unable to uncompress '%1': %2",
1470 $IO::Uncompress::UnLzma::UnLzmaError));
1472 elsif (can_load( modules => $use_list2 ) ) {
1474 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1475 return $self->_error(loc("Could not open '%1' for writing: %2",
1476 $self->_gunzip_to, $! ));
1479 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1480 unless ( defined $buffer ) {
1481 return $self->_error(loc("Could not unlzma '%1': %2",
1482 $self->archive, $@));
1485 $self->_print($fh, $buffer) if defined $buffer;
1490 $self->_error(loc("You do not have '%1' or '%2' installed - Please " .
1491 "install it as soon as possible.", 'Compress::unLZMA', 'IO::Uncompress::UnLzma'));
1495 ### set what files where extract, and where they went ###
1496 $self->files( [$self->_gunzip_to] );
1497 $self->extract_path( File::Spec->rel2abs(cwd()) );
1502 #################################
1506 #################################
1508 # For printing binaries that avoids interfering globals
1513 local( $\, $", $, ) = ( undef, ' ', '' );
1514 return print $fh @_;
1520 my $lerror = Carp::longmess($error);
1522 push @{$self->_error_msg}, $error;
1523 push @{$self->_error_msg_long}, $lerror;
1525 ### set $Archive::Extract::WARN to 0 to disable printing
1528 carp $DEBUG ? $lerror : $error;
1537 ### make sure we have a fallback aref
1540 ? $self->_error_msg_long
1544 return join $/, @$aref;
1547 =head2 debug( MESSAGE )
1549 This method outputs MESSAGE to the default filehandle if C<$DEBUG> is
1550 true. It's a small method, but it's here if you'd like to subclass it
1551 so you can so something else with any debugging output.
1555 ### this is really a stub for subclassing
1557 return unless $DEBUG;
1562 sub _no_buffer_files {
1564 my $file = shift or return;
1565 return loc("No buffer captured, unable to tell ".
1566 "extracted files or extraction dir for '%1'", $file);
1569 sub _no_buffer_content {
1571 my $file = shift or return;
1572 return loc("No buffer captured, unable to get content for '%1'", $file);
1580 C<Archive::Extract> tries first to determine what type of archive you
1581 are passing it, by inspecting its suffix. It does not do this by using
1582 Mime magic, or something related. See C<CAVEATS> below.
1584 Once it has determined the file type, it knows which extraction methods
1585 it can use on the archive. It will try a perl solution first, then fall
1586 back to a commandline tool if that fails. If that also fails, it will
1587 return false, indicating it was unable to extract the archive.
1588 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1592 =head2 File Extensions
1594 C<Archive::Extract> trusts on the extension of the archive to determine
1595 what type it is, and what extractor methods therefore can be used. If
1596 your archives do not have any of the extensions as described in the
1597 C<new()> method, you will have to specify the type explicitly, or
1598 C<Archive::Extract> will not be able to extract the archive for you.
1600 =head2 Supporting Very Large Files
1602 C<Archive::Extract> can use either pure perl modules or command line
1603 programs under the hood. Some of the pure perl modules (like
1604 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1605 which may not be feasible on your system. Consider setting the global
1606 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1607 the use of command line programs and won't consume so much memory.
1609 See the C<GLOBAL VARIABLES> section below for details.
1611 =head2 Bunzip2 support of arbitrary extensions.
1613 Older versions of C</bin/bunzip2> do not support arbitrary file
1614 extensions and insist on a C<.bz2> suffix. Although we do our best
1615 to guard against this, if you experience a bunzip2 error, it may
1616 be related to this. For details, please see the C<have_old_bunzip2>
1619 =head1 GLOBAL VARIABLES
1621 =head2 $Archive::Extract::DEBUG
1623 Set this variable to C<true> to have all calls to command line tools
1624 be printed out, including all their output.
1625 This also enables C<Carp::longmess> errors, instead of the regular
1628 Good for tracking down why things don't work with your particular
1631 Defaults to C<false>.
1633 =head2 $Archive::Extract::WARN
1635 This variable controls whether errors encountered internally by
1636 C<Archive::Extract> should be C<carp>'d or not.
1638 Set to false to silence warnings. Inspect the output of the C<error()>
1639 method manually to see what went wrong.
1641 Defaults to C<true>.
1643 =head2 $Archive::Extract::PREFER_BIN
1645 This variables controls whether C<Archive::Extract> should prefer the
1646 use of perl modules, or commandline tools to extract archives.
1648 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1650 Defaults to C<false>.
1652 =head1 TODO / CAVEATS
1656 =item Mime magic support
1658 Maybe this module should use something like C<File::Type> to determine
1659 the type, rather than blindly trust the suffix.
1663 Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
1664 extraction, and a C<chdir> back again after. This is not necessarily
1665 thread safe. See C<rt.cpan.org> bug C<#45671> for details.
1671 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.orgE<gt>.
1675 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1679 This library is free software; you may redistribute and/or modify it
1680 under the same terms as Perl itself.
1685 # c-indentation-style: bsd
1687 # indent-tabs-mode: nil
1689 # vim: expandtab shiftwidth=4: