cpan/Archive-Extract/lib/Archive/Extract.pm

   1 package Archive::Extract;
   2 use if $] > 5.017, 'deprecate';
   3
   4 use strict;
   5
   6 use Cwd                         qw[cwd chdir];
   7 use Carp                        qw[carp];
   8 use IPC::Cmd                    qw[run can_run];
   9 use FileHandle;
  10 use File::Path                  qw[mkpath];
  11 use File::Spec;
  12 use File::Basename              qw[dirname basename];
  13 use Params::Check               qw[check];
  14 use Module::Load::Conditional   qw[can_load check_install];
  15 use Locale::Maketext::Simple    Style => 'gettext';
  16
  17 ### solaris has silly /bin/tar output ###
  18 use constant ON_SOLARIS     => $^O eq 'solaris' ? 1 : 0;
  19 use constant ON_NETBSD      => $^O eq 'netbsd' ? 1 : 0;
  20 use constant ON_OPENBSD     => $^O eq 'openbsd' ? 1 : 0;
  21 use constant ON_FREEBSD     => $^O eq 'freebsd' ? 1 : 0;
  22 use constant ON_LINUX       => $^O eq 'linux' ? 1 : 0;
  23 use constant FILE_EXISTS    => sub { -e $_[0] ? 1 : 0 };
  24
  25 ### VMS may require quoting upper case command options
  26 use constant ON_VMS         => $^O eq 'VMS' ? 1 : 0;
  27
  28 ### Windows needs special treatment of Tar options
  29 use constant ON_WIN32       => $^O eq 'MSWin32' ? 1 : 0;
  30
  31 ### we can't use this extraction method, because of missing
  32 ### modules/binaries:
  33 use constant METHOD_NA      => [];
  34
  35 ### If these are changed, update @TYPES and the new() POD
  36 use constant TGZ            => 'tgz';
  37 use constant TAR            => 'tar';
  38 use constant GZ             => 'gz';
  39 use constant ZIP            => 'zip';
  40 use constant BZ2            => 'bz2';
  41 use constant TBZ            => 'tbz';
  42 use constant Z              => 'Z';
  43 use constant LZMA           => 'lzma';
  44 use constant XZ             => 'xz';
  45 use constant TXZ            => 'txz';
  46
  47 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
  48             $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER
  49          ];
  50
  51 $VERSION            = '0.64';
  52 $PREFER_BIN         = 0;
  53 $WARN               = 1;
  54 $DEBUG              = 0;
  55 $_ALLOW_PURE_PERL   = 1;    # allow pure perl extractors
  56 $_ALLOW_BIN         = 1;    # allow binary extractors
  57 $_ALLOW_TAR_ITER        = 1;    # try to use Archive::Tar->iter if available
  58
  59 # same as all constants
  60 my @Types           = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA, XZ, TXZ );
  61
  62 local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
  63
  64 =pod
  65
  66 =head1 NAME
  67
  68 Archive::Extract - A generic archive extracting mechanism
  69
  70 =head1 SYNOPSIS
  71
  72     use Archive::Extract;
  73
  74     ### build an Archive::Extract object ###
  75     my $ae = Archive::Extract->new( archive => 'foo.tgz' );
  76
  77     ### extract to cwd() ###
  78     my $ok = $ae->extract;
  79
  80     ### extract to /tmp ###
  81     my $ok = $ae->extract( to => '/tmp' );
  82
  83     ### what if something went wrong?
  84     my $ok = $ae->extract or die $ae->error;
  85
  86     ### files from the archive ###
  87     my $files   = $ae->files;
  88
  89     ### dir that was extracted to ###
  90     my $outdir  = $ae->extract_path;
  91
  92
  93     ### quick check methods ###
  94     $ae->is_tar     # is it a .tar file?
  95     $ae->is_tgz     # is it a .tar.gz or .tgz file?
  96     $ae->is_gz;     # is it a .gz file?
  97     $ae->is_zip;    # is it a .zip file?
  98     $ae->is_bz2;    # is it a .bz2 file?
  99     $ae->is_tbz;    # is it a .tar.bz2 or .tbz file?
 100     $ae->is_lzma;   # is it a .lzma file?
 101     $ae->is_xz;     # is it a .xz file?
 102     $ae->is_txz;    # is it a .tar.xz or .txz file?
 103
 104     ### absolute path to the archive you provided ###
 105     $ae->archive;
 106
 107     ### commandline tools, if found ###
 108     $ae->bin_tar     # path to /bin/tar, if found
 109     $ae->bin_gzip    # path to /bin/gzip, if found
 110     $ae->bin_unzip   # path to /bin/unzip, if found
 111     $ae->bin_bunzip2 # path to /bin/bunzip2 if found
 112     $ae->bin_unlzma  # path to /bin/unlzma if found
 113     $ae->bin_unxz    # path to /bin/unxz if found
 114
 115 =head1 DESCRIPTION
 116
 117 Archive::Extract is a generic archive extraction mechanism.
 118
 119 It allows you to extract any archive file of the type .tar, .tar.gz,
 120 .gz, .Z, tar.bz2, .tbz, .bz2, .zip, .xz,, .txz, .tar.xz or .lzma
 121 without having to worry how it
 122 does so, or use different interfaces for each type by using either
 123 perl modules, or commandline tools on your system.
 124
 125 See the C<HOW IT WORKS> section further down for details.
 126
 127 =cut
 128
 129
 130 ### see what /bin/programs are available ###
 131 $PROGRAMS = {};
 132 CMD: for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma unxz]) {
 133     if ( $pgm eq 'unzip' and ( ON_NETBSD or ON_FREEBSD ) ) {
 134       local $IPC::Cmd::INSTANCES = 1;
 135       ($PROGRAMS->{$pgm}) = grep { ON_NETBSD ? m!/usr/pkg/! : m!/usr/local! } can_run($pgm);
 136       next CMD;
 137     }
 138     if ( $pgm eq 'unzip' and ON_LINUX ) {
 139       # Check if 'unzip' is busybox masquerading
 140       local $IPC::Cmd::INSTANCES = 1;
 141       my $opt = ON_VMS ? '"-Z"' : '-Z';
 142       ($PROGRAMS->{$pgm}) = grep { scalar run(command=> [ $_, $opt, '-1' ]) } can_run($pgm);
 143       next CMD;
 144     }
 145     if ( $pgm eq 'tar' and ON_OPENBSD ) {
 146       # try gtar first
 147       next CMD if $PROGRAMS->{$pgm} = can_run('gtar');
 148     }
 149     $PROGRAMS->{$pgm} = can_run($pgm);
 150 }
 151
 152 ### mapping from types to extractor methods ###
 153 my $Mapping = {  # binary program           # pure perl module
 154     is_tgz  => { bin => '_untar_bin',       pp => '_untar_at'   },
 155     is_tar  => { bin => '_untar_bin',       pp => '_untar_at'   },
 156     is_gz   => { bin => '_gunzip_bin',      pp => '_gunzip_cz'  },
 157     is_zip  => { bin => '_unzip_bin',       pp => '_unzip_az'   },
 158     is_tbz  => { bin => '_untar_bin',       pp => '_untar_at'   },
 159     is_bz2  => { bin => '_bunzip2_bin',     pp => '_bunzip2_bz2'},
 160     is_Z    => { bin => '_uncompress_bin',  pp => '_gunzip_cz'  },
 161     is_lzma => { bin => '_unlzma_bin',      pp => '_unlzma_cz'  },
 162     is_xz   => { bin => '_unxz_bin',        pp => '_unxz_cz'    },
 163     is_txz  => { bin => '_untar_bin',       pp => '_untar_at'   },
 164 };
 165
 166 {   ### use subs so we re-generate array refs etc for the no-override flags
 167     ### if we don't, then we reuse the same arrayref, meaning objects store
 168     ### previous errors
 169     my $tmpl = {
 170         archive         => sub { { required => 1, allow => FILE_EXISTS }    },
 171         type            => sub { { default => '', allow => [ @Types ] }     },
 172         _error_msg      => sub { { no_override => 1, default => [] }        },
 173         _error_msg_long => sub { { no_override => 1, default => [] }        },
 174     };
 175
 176     ### build accessors ###
 177     for my $method( keys %$tmpl,
 178                     qw[_extractor _gunzip_to files extract_path],
 179     ) {
 180         no strict 'refs';
 181         *$method = sub {
 182                         my $self = shift;
 183                         $self->{$method} = $_[0] if @_;
 184                         return $self->{$method};
 185                     }
 186     }
 187
 188 =head1 METHODS
 189
 190 =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
 191
 192 Creates a new C<Archive::Extract> object based on the archive file you
 193 passed it. Automatically determines the type of archive based on the
 194 extension, but you can override that by explicitly providing the
 195 C<type> argument.
 196
 197 Valid values for C<type> are:
 198
 199 =over 4
 200
 201 =item tar
 202
 203 Standard tar files, as produced by, for example, C</bin/tar>.
 204 Corresponds to a C<.tar> suffix.
 205
 206 =item tgz
 207
 208 Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
 209 Corresponds to a C<.tgz> or C<.tar.gz> suffix.
 210
 211 =item gz
 212
 213 Gzip compressed file, as produced by, for example C</bin/gzip>.
 214 Corresponds to a C<.gz> suffix.
 215
 216 =item Z
 217
 218 Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
 219 Corresponds to a C<.Z> suffix.
 220
 221 =item zip
 222
 223 Zip compressed file, as produced by, for example C</bin/zip>.
 224 Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
 225
 226 =item bz2
 227
 228 Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
 229 Corresponds to a C<.bz2> suffix.
 230
 231 =item tbz
 232
 233 Bzip2 compressed tar file, as produced by, for example C</bin/tar -j>.
 234 Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
 235
 236 =item lzma
 237
 238 Lzma compressed file, as produced by C</bin/lzma>.
 239 Corresponds to a C<.lzma> suffix.
 240
 241 =item xz
 242
 243 Xz compressed file, as produced by C</bin/xz>.
 244 Corresponds to a C<.xz> suffix.
 245
 246 =item txz
 247
 248 Xz compressed tar file, as produced by, for example C</bin/tar -J>.
 249 Corresponds to a C<.txz> or C<.tar.xz> suffix.
 250
 251 =back
 252
 253 Returns a C<Archive::Extract> object on success, or false on failure.
 254
 255 =cut
 256
 257     ### constructor ###
 258     sub new {
 259         my $class   = shift;
 260         my %hash    = @_;
 261
 262         ### see above why we use subs here and generate the template;
 263         ### it's basically to not re-use arrayrefs
 264         my %utmpl   = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
 265
 266         my $parsed = check( \%utmpl, \%hash ) or return;
 267
 268         ### make sure we have an absolute path ###
 269         my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
 270
 271         ### figure out the type, if it wasn't already specified ###
 272         unless ( $parsed->{type} ) {
 273             $parsed->{type} =
 274                 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i         ? TGZ   :
 275                 $ar =~ /.+?\.gz$/i                      ? GZ    :
 276                 $ar =~ /.+?\.tar$/i                     ? TAR   :
 277                 $ar =~ /.+?\.(zip|jar|ear|war|par)$/i   ? ZIP   :
 278                 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i     ? TBZ   :
 279                 $ar =~ /.+?\.bz2$/i                     ? BZ2   :
 280                 $ar =~ /.+?\.Z$/                        ? Z     :
 281                 $ar =~ /.+?\.lzma$/                     ? LZMA  :
 282                 $ar =~ /.+?\.(?:txz|tar\.xz)$/i         ? TXZ   :
 283                 $ar =~ /.+?\.xz$/                       ? XZ    :
 284                 '';
 285
 286         }
 287
 288         bless $parsed, $class;
 289
 290         ### don't know what type of file it is
 291         ### XXX this *has* to be an object call, not a package call
 292         return $parsed->_error(loc("Cannot determine file type for '%1'",
 293                                 $parsed->{archive} )) unless $parsed->{type};
 294         return $parsed;
 295     }
 296 }
 297
 298 =head2 $ae->extract( [to => '/output/path'] )
 299
 300 Extracts the archive represented by the C<Archive::Extract> object to
 301 the path of your choice as specified by the C<to> argument. Defaults to
 302 C<cwd()>.
 303
 304 Since C<.gz> files never hold a directory, but only a single file; if
 305 the C<to> argument is an existing directory, the file is extracted
 306 there, with its C<.gz> suffix stripped.
 307 If the C<to> argument is not an existing directory, the C<to> argument
 308 is understood to be a filename, if the archive type is C<gz>.
 309 In the case that you did not specify a C<to> argument, the output
 310 file will be the name of the archive file, stripped from its C<.gz>
 311 suffix, in the current working directory.
 312
 313 C<extract> will try a pure perl solution first, and then fall back to
 314 commandline tools if they are available. See the C<GLOBAL VARIABLES>
 315 section below on how to alter this behaviour.
 316
 317 It will return true on success, and false on failure.
 318
 319 On success, it will also set the follow attributes in the object:
 320
 321 =over 4
 322
 323 =item $ae->extract_path
 324
 325 This is the directory that the files where extracted to.
 326
 327 =item $ae->files
 328
 329 This is an array ref with the paths of all the files in the archive,
 330 relative to the C<to> argument you specified.
 331 To get the full path to an extracted file, you would use:
 332
 333     File::Spec->catfile( $to, $ae->files->[0] );
 334
 335 Note that all files from a tar archive will be in unix format, as per
 336 the tar specification.
 337
 338 =back
 339
 340 =cut
 341
 342 sub extract {
 343     my $self = shift;
 344     my %hash = @_;
 345
 346     ### reset error messages
 347     $self->_error_msg( [] );
 348     $self->_error_msg_long( [] );
 349
 350     my $to;
 351     my $tmpl = {
 352         to  => { default => '.', store => \$to }
 353     };
 354
 355     check( $tmpl, \%hash ) or return;
 356
 357     ### so 'to' could be a file or a dir, depending on whether it's a .gz
 358     ### file, or basically anything else.
 359     ### so, check that, then act accordingly.
 360     ### set an accessor specifically so _gunzip can know what file to extract
 361     ### to.
 362     my $dir;
 363     {   ### a foo.gz file
 364         if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma or $self->is_xz ) {
 365
 366             my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma|xz)$//i;
 367
 368             ### to is a dir?
 369             if ( -d $to ) {
 370                 $dir = $to;
 371                 $self->_gunzip_to( basename($cp) );
 372
 373             ### then it's a filename
 374             } else {
 375                 $dir = dirname($to);
 376                 $self->_gunzip_to( basename($to) );
 377             }
 378
 379         ### not a foo.gz file
 380         } else {
 381             $dir = $to;
 382         }
 383     }
 384
 385     ### make the dir if it doesn't exist ###
 386     unless( -d $dir ) {
 387         eval { mkpath( $dir ) };
 388
 389         return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
 390             if $@;
 391     }
 392
 393     ### get the current dir, to restore later ###
 394     my $cwd = cwd();
 395
 396     my $ok = 1;
 397     EXTRACT: {
 398
 399         ### chdir to the target dir ###
 400         unless( chdir $dir ) {
 401             $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
 402             $ok = 0; last EXTRACT;
 403         }
 404
 405         ### set files to an empty array ref, so there's always an array
 406         ### ref IN the accessor, to avoid errors like:
 407         ### Can't use an undefined value as an ARRAY reference at
 408         ### ../lib/Archive/Extract.pm line 742. (rt #19815)
 409         $self->files( [] );
 410
 411         ### find out the dispatch methods needed for this type of
 412         ### archive. Do a $self->is_XXX to figure out the type, then
 413         ### get the hashref with bin + pure perl dispatchers.
 414         my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
 415
 416         ### add pure perl extractor if allowed & add bin extractor if allowed
 417         my @methods;
 418         push @methods, $map->{'pp'}  if $_ALLOW_PURE_PERL;
 419         push @methods, $map->{'bin'} if $_ALLOW_BIN;
 420
 421         ### reverse it if we prefer bin extractors
 422         @methods = reverse @methods if $PREFER_BIN;
 423
 424         my($na, $fail);
 425         for my $method (@methods) {
 426             $self->debug( "# Extracting with ->$method\n" );
 427
 428             my $rv = $self->$method;
 429
 430             ### a positive extraction
 431             if( $rv and $rv ne METHOD_NA ) {
 432                 $self->debug( "# Extraction succeeded\n" );
 433                 $self->_extractor($method);
 434                 last;
 435
 436             ### method is not available
 437             } elsif ( $rv and $rv eq METHOD_NA ) {
 438                 $self->debug( "# Extraction method not available\n" );
 439                 $na++;
 440             } else {
 441                 $self->debug( "# Extraction method failed\n" );
 442                 $fail++;
 443             }
 444         }
 445
 446         ### warn something went wrong if we didn't get an extractor
 447         unless( $self->_extractor ) {
 448             my $diag = $fail ? loc("Extract failed due to errors") :
 449                        $na   ? loc("Extract failed; no extractors available") :
 450                        '';
 451
 452             $self->_error($diag);
 453             $ok = 0;
 454         }
 455     }
 456
 457     ### and chdir back ###
 458     unless( chdir $cwd ) {
 459         $self->_error(loc("Could not chdir back to start dir '%1': %2'",
 460                             $cwd, $!));
 461     }
 462
 463     return $ok;
 464 }
 465
 466 =pod
 467
 468 =head1 ACCESSORS
 469
 470 =head2 $ae->error([BOOL])
 471
 472 Returns the last encountered error as string.
 473 Pass it a true value to get the C<Carp::longmess()> output instead.
 474
 475 =head2 $ae->extract_path
 476
 477 This is the directory the archive got extracted to.
 478 See C<extract()> for details.
 479
 480 =head2 $ae->files
 481
 482 This is an array ref holding all the paths from the archive.
 483 See C<extract()> for details.
 484
 485 =head2 $ae->archive
 486
 487 This is the full path to the archive file represented by this
 488 C<Archive::Extract> object.
 489
 490 =head2 $ae->type
 491
 492 This is the type of archive represented by this C<Archive::Extract>
 493 object. See accessors below for an easier way to use this.
 494 See the C<new()> method for details.
 495
 496 =head2 $ae->types
 497
 498 Returns a list of all known C<types> for C<Archive::Extract>'s
 499 C<new> method.
 500
 501 =cut
 502
 503 sub types { return @Types }
 504
 505 =head2 $ae->is_tgz
 506
 507 Returns true if the file is of type C<.tar.gz>.
 508 See the C<new()> method for details.
 509
 510 =head2 $ae->is_tar
 511
 512 Returns true if the file is of type C<.tar>.
 513 See the C<new()> method for details.
 514
 515 =head2 $ae->is_gz
 516
 517 Returns true if the file is of type C<.gz>.
 518 See the C<new()> method for details.
 519
 520 =head2 $ae->is_Z
 521
 522 Returns true if the file is of type C<.Z>.
 523 See the C<new()> method for details.
 524
 525 =head2 $ae->is_zip
 526
 527 Returns true if the file is of type C<.zip>.
 528 See the C<new()> method for details.
 529
 530 =head2 $ae->is_lzma
 531
 532 Returns true if the file is of type C<.lzma>.
 533 See the C<new()> method for details.
 534
 535 =head2 $ae->is_xz
 536
 537 Returns true if the file is of type C<.xz>.
 538 See the C<new()> method for details.
 539
 540 =cut
 541
 542 ### quick check methods ###
 543 sub is_tgz  { return $_[0]->type eq TGZ }
 544 sub is_tar  { return $_[0]->type eq TAR }
 545 sub is_gz   { return $_[0]->type eq GZ  }
 546 sub is_zip  { return $_[0]->type eq ZIP }
 547 sub is_tbz  { return $_[0]->type eq TBZ }
 548 sub is_bz2  { return $_[0]->type eq BZ2 }
 549 sub is_Z    { return $_[0]->type eq Z   }
 550 sub is_lzma { return $_[0]->type eq LZMA }
 551 sub is_xz   { return $_[0]->type eq XZ   }
 552 sub is_txz  { return $_[0]->type eq TXZ }
 553
 554 =pod
 555
 556 =head2 $ae->bin_tar
 557
 558 Returns the full path to your tar binary, if found.
 559
 560 =head2 $ae->bin_gzip
 561
 562 Returns the full path to your gzip binary, if found
 563
 564 =head2 $ae->bin_unzip
 565
 566 Returns the full path to your unzip binary, if found
 567
 568 =head2 $ae->bin_unlzma
 569
 570 Returns the full path to your unlzma binary, if found
 571
 572 =head2 $ae->bin_unxz
 573
 574 Returns the full path to your unxz binary, if found
 575
 576 =cut
 577
 578 ### paths to commandline tools ###
 579 sub bin_gzip        { return $PROGRAMS->{'gzip'}    if $PROGRAMS->{'gzip'}  }
 580 sub bin_unzip       { return $PROGRAMS->{'unzip'}   if $PROGRAMS->{'unzip'} }
 581 sub bin_tar         { return $PROGRAMS->{'tar'}     if $PROGRAMS->{'tar'}   }
 582 sub bin_bunzip2     { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
 583 sub bin_uncompress  { return $PROGRAMS->{'uncompress'}
 584                                                  if $PROGRAMS->{'uncompress'} }
 585 sub bin_unlzma      { return $PROGRAMS->{'unlzma'}  if $PROGRAMS->{'unlzma'} }
 586 sub bin_unxz        { return $PROGRAMS->{'unxz'}    if $PROGRAMS->{'unxz'} }
 587
 588 =head2 $bool = $ae->have_old_bunzip2
 589
 590 Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
 591 require all archive names to end in C<.bz2> or it will not extract
 592 them. This method checks if you have a recent version of C<bunzip2>
 593 that allows any extension, or an older one that doesn't.
 594
 595 =cut
 596
 597 sub have_old_bunzip2 {
 598     my $self = shift;
 599
 600     ### no bunzip2? no old bunzip2 either :)
 601     return unless $self->bin_bunzip2;
 602
 603     ### if we can't run this, we can't be sure if it's too old or not
 604     ### XXX stupid stupid stupid bunzip2 doesn't understand --version
 605     ### is not a request to extract data:
 606     ### $ bunzip2 --version
 607     ### bzip2, a block-sorting file compressor.  Version 1.0.2, 30-Dec-2001.
 608     ### [...]
 609     ### bunzip2: I won't read compressed data from a terminal.
 610     ### bunzip2: For help, type: `bunzip2 --help'.
 611     ### $ echo $?
 612     ### 1
 613     ### HATEFUL!
 614
 615     ### double hateful: bunzip2 --version also hangs if input is a pipe
 616     ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
 617     ### So, we have to provide *another* argument which is a fake filename,
 618     ### just so it wont try to read from stdin to print its version..
 619     ### *sigh*
 620     ### Even if the file exists, it won't clobber or change it.
 621     my $buffer;
 622     scalar run(
 623          command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
 624          verbose => 0,
 625          buffer  => \$buffer
 626     );
 627
 628     ### no output
 629     return unless $buffer;
 630
 631     my ($version) = $buffer =~ /version \s+ (\d+)/ix;
 632
 633     return 1 if $version < 1;
 634     return;
 635 }
 636
 637 #################################
 638 #
 639 # Untar code
 640 #
 641 #################################
 642
 643 ### annoying issue with (gnu) tar on win32, as illustrated by this
 644 ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
 645 ### which shows that (gnu) tar will interpret a file name with a :
 646 ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
 647 ### as a remote shell, and the extract fails.
 648 {   my @ExtraTarFlags;
 649     if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
 650
 651         ### if this is gnu tar we are running, we need to use --force-local
 652         push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
 653     }
 654
 655
 656     ### use /bin/tar to extract ###
 657     sub _untar_bin {
 658         my $self = shift;
 659
 660         ### check for /bin/tar ###
 661         ### check for /bin/gzip if we need it ###
 662         ### if any of the binaries are not available, return NA
 663         {   my $diag =  !$self->bin_tar ?
 664                             loc("No '%1' program found", '/bin/tar') :
 665                         $self->is_tgz && !$self->bin_gzip ?
 666                             loc("No '%1' program found", '/bin/gzip') :
 667                         $self->is_tbz && !$self->bin_bunzip2 ?
 668                             loc("No '%1' program found", '/bin/bunzip2') :
 669                         $self->is_txz && !$self->bin_unxz ?
 670                             loc("No '%1' program found", '/bin/unxz') :
 671                         '';
 672
 673             if( $diag ) {
 674                 $self->_error( $diag );
 675                 return METHOD_NA;
 676             }
 677         }
 678
 679         ### XXX figure out how to make IPC::Run do this in one call --
 680         ### currently i don't know how to get output of a command after a pipe
 681         ### trapped in a scalar. Mailed barries about this 5th of june 2004.
 682
 683         ### see what command we should run, based on whether
 684         ### it's a .tgz or .tar
 685
 686         ### GNU tar can't handled VMS filespecs, but VMSTAR can handle Unix filespecs.
 687         my $archive = $self->archive;
 688         $archive = VMS::Filespec::unixify($archive) if ON_VMS;
 689
 690         ### XXX solaris tar and bsdtar are having different outputs
 691         ### depending whether you run with -x or -t
 692         ### compensate for this insanity by running -t first, then -x
 693         {    my $cmd =
 694                 $self->is_tgz ? [$self->bin_gzip, '-cdf', $archive, '|',
 695                                  $self->bin_tar, '-tf', '-'] :
 696                 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
 697                                  $self->bin_tar, '-tf', '-'] :
 698                 $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
 699                                  $self->bin_tar, '-tf', '-'] :
 700                 [$self->bin_tar, @ExtraTarFlags, '-tf', $archive];
 701
 702             ### run the command
 703             ### newer versions of 'tar' (1.21 and up) now print record size
 704             ### to STDERR as well if v OR t is given (used to be both). This
 705             ### is a 'feature' according to the changelog, so we must now only
 706             ### inspect STDOUT, otherwise, failures like these occur:
 707             ### http://www.cpantesters.org/cpan/report/3230366
 708             my $buffer  = '';
 709             my @out     = run(  command => $cmd,
 710                                 buffer  => \$buffer,
 711                                 verbose => $DEBUG );
 712
 713             ### command was unsuccessful
 714             unless( $out[0] ) {
 715                 return $self->_error(loc(
 716                                 "Error listing contents of archive '%1': %2",
 717                                 $archive, $buffer ));
 718             }
 719
 720             ### no buffers available?
 721             if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 722                 $self->_error( $self->_no_buffer_files( $archive ) );
 723
 724             } else {
 725                 ### if we're on solaris we /might/ be using /bin/tar, which has
 726                 ### a weird output format... we might also be using
 727                 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
 728                 ### fine... so we have to do some guessing here =/
 729                 my @files = map { chomp;
 730                               !ON_SOLARIS ? $_
 731                                           : (m|^ x \s+  # 'xtract' -- sigh
 732                                                 (.+?),  # the actual file name
 733                                                 \s+ [\d,.]+ \s bytes,
 734                                                 \s+ [\d,.]+ \s tape \s blocks
 735                                             |x ? $1 : $_);
 736
 737                         ### only STDOUT, see above. Sometimes, extra whitespace
 738                         ### is present, so make sure we only pick lines with
 739                         ### a length
 740                         } grep { length } map { split $/, $_ } join '', @{$out[3]};
 741
 742                 ### store the files that are in the archive ###
 743                 $self->files(\@files);
 744             }
 745         }
 746
 747         ### now actually extract it ###
 748         {   my $cmd =
 749                 $self->is_tgz ? [$self->bin_gzip, '-cdf', $archive, '|',
 750                                  $self->bin_tar, '-xf', '-'] :
 751                 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
 752                                  $self->bin_tar, '-xf', '-'] :
 753                 $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
 754                                  $self->bin_tar, '-xf', '-'] :
 755                 [$self->bin_tar, @ExtraTarFlags, '-xf', $archive];
 756
 757             my $buffer = '';
 758             unless( scalar run( command => $cmd,
 759                                 buffer  => \$buffer,
 760                                 verbose => $DEBUG )
 761             ) {
 762                 return $self->_error(loc("Error extracting archive '%1': %2",
 763                                 $archive, $buffer ));
 764             }
 765
 766             ### we might not have them, due to lack of buffers
 767             if( $self->files ) {
 768                 ### now that we've extracted, figure out where we extracted to
 769                 my $dir = $self->__get_extract_dir( $self->files );
 770
 771                 ### store the extraction dir ###
 772                 $self->extract_path( $dir );
 773             }
 774         }
 775
 776         ### we got here, no error happened
 777         return 1;
 778     }
 779 }
 780
 781
 782 ### use archive::tar to extract ###
 783 sub _untar_at {
 784     my $self = shift;
 785
 786     ### Loading Archive::Tar is going to set it to 1, so make it local
 787     ### within this block, starting with its initial value. Whatever
 788     ### Achive::Tar does will be undone when we return.
 789     ###
 790     ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
 791     ### so users don't have to even think about this variable. If they
 792     ### do, they still get their set value outside of this call.
 793     local $Archive::Tar::WARN = $Archive::Tar::WARN;
 794
 795     ### we definitely need Archive::Tar, so load that first
 796     {   my $use_list = { 'Archive::Tar' => '0.0' };
 797
 798         unless( can_load( modules => $use_list ) ) {
 799
 800             $self->_error(loc("You do not have '%1' installed - " .
 801                               "Please install it as soon as possible.",
 802                               'Archive::Tar'));
 803
 804             return METHOD_NA;
 805         }
 806     }
 807
 808     ### we might pass it a filehandle if it's a .tbz file..
 809     my $fh_to_read = $self->archive;
 810
 811     ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
 812     ### if A::T's version is 0.99 or higher
 813     if( $self->is_tgz ) {
 814         my $use_list = { 'Compress::Zlib' => '0.0' };
 815            $use_list->{ 'IO::Zlib' } = '0.0'
 816                 if $Archive::Tar::VERSION >= '0.99';
 817
 818         unless( can_load( modules => $use_list ) ) {
 819             my $which = join '/', sort keys %$use_list;
 820
 821             $self->_error(loc(
 822                 "You do not have '%1' installed - Please ".
 823                 "install it as soon as possible.", $which)
 824             );
 825
 826             return METHOD_NA;
 827         }
 828
 829     } elsif ( $self->is_tbz ) {
 830         my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
 831         unless( can_load( modules => $use_list ) ) {
 832             $self->_error(loc(
 833                 "You do not have '%1' installed - Please " .
 834                 "install it as soon as possible.",
 835                 'IO::Uncompress::Bunzip2')
 836             );
 837
 838             return METHOD_NA;
 839         }
 840
 841         my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
 842             return $self->_error(loc("Unable to open '%1': %2",
 843                             $self->archive,
 844                             $IO::Uncompress::Bunzip2::Bunzip2Error));
 845
 846         $fh_to_read = $bz;
 847     } elsif ( $self->is_txz ) {
 848         my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
 849         unless( can_load( modules => $use_list ) ) {
 850             $self->_error(loc(
 851                 "You do not have '%1' installed - Please " .
 852                 "install it as soon as possible.",
 853                 'IO::Uncompress::UnXz')
 854             );
 855
 856             return METHOD_NA;
 857         }
 858
 859         my $xz = IO::Uncompress::UnXz->new( $self->archive ) or
 860             return $self->_error(loc("Unable to open '%1': %2",
 861                             $self->archive,
 862                             $IO::Uncompress::UnXz::UnXzError));
 863
 864         $fh_to_read = $xz;
 865     }
 866
 867     my @files;
 868     {
 869         ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
 870         ### localized $Archive::Tar::WARN already.
 871         $Archive::Tar::WARN = $Archive::Extract::WARN;
 872
 873         ### only tell it it's compressed if it's a .tgz, as we give it a file
 874         ### handle if it's a .tbz
 875         my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) );
 876
 877         ### for version of Archive::Tar > 1.04
 878         local $Archive::Tar::CHOWN = 0;
 879
 880         ### use the iterator if we can. it's a feature of A::T 1.40 and up
 881         if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) {
 882
 883             my $next;
 884             unless ( $next = Archive::Tar->iter( @read ) ) {
 885                 return $self->_error(loc(
 886                             "Unable to read '%1': %2", $self->archive,
 887                             $Archive::Tar::error));
 888             }
 889
 890             while ( my $file = $next->() ) {
 891                 push @files, $file->full_path;
 892
 893                 $file->extract or return $self->_error(loc(
 894                         "Unable to read '%1': %2",
 895                         $self->archive,
 896                         $Archive::Tar::error));
 897             }
 898
 899         ### older version, read the archive into memory
 900         } else {
 901
 902             my $tar = Archive::Tar->new();
 903
 904             unless( $tar->read( @read ) ) {
 905                 return $self->_error(loc("Unable to read '%1': %2",
 906                             $self->archive, $Archive::Tar::error));
 907             }
 908
 909             ### workaround to prevent Archive::Tar from setting uid, which
 910             ### is a potential security hole. -autrijus
 911             ### have to do it here, since A::T needs to be /loaded/ first ###
 912             {   no strict 'refs'; local $^W;
 913
 914                 ### older versions of archive::tar <= 0.23
 915                 *Archive::Tar::chown = sub {};
 916             }
 917
 918             {   local $^W;  # quell 'splice() offset past end of array' warnings
 919                             # on older versions of A::T
 920
 921                 ### older archive::tar always returns $self, return value
 922                 ### slightly fux0r3d because of it.
 923                 $tar->extract or return $self->_error(loc(
 924                         "Unable to extract '%1': %2",
 925                         $self->archive, $Archive::Tar::error ));
 926             }
 927
 928             @files = $tar->list_files;
 929         }
 930     }
 931
 932     my $dir = $self->__get_extract_dir( \@files );
 933
 934     ### store the files that are in the archive ###
 935     $self->files(\@files);
 936
 937     ### store the extraction dir ###
 938     $self->extract_path( $dir );
 939
 940     ### check if the dir actually appeared ###
 941     return 1 if -d $self->extract_path;
 942
 943     ### no dir, we failed ###
 944     return $self->_error(loc("Unable to extract '%1': %2",
 945                                 $self->archive, $Archive::Tar::error ));
 946 }
 947
 948 #################################
 949 #
 950 # Gunzip code
 951 #
 952 #################################
 953
 954 sub _gunzip_bin {
 955     my $self = shift;
 956
 957     ### check for /bin/gzip -- we need it ###
 958     unless( $self->bin_gzip ) {
 959         $self->_error(loc("No '%1' program found", '/bin/gzip'));
 960         return METHOD_NA;
 961     }
 962
 963     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
 964         return $self->_error(loc("Could not open '%1' for writing: %2",
 965                             $self->_gunzip_to, $! ));
 966
 967     my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
 968
 969     my $buffer;
 970     unless( scalar run( command => $cmd,
 971                         verbose => $DEBUG,
 972                         buffer  => \$buffer )
 973     ) {
 974         return $self->_error(loc("Unable to gunzip '%1': %2",
 975                                     $self->archive, $buffer));
 976     }
 977
 978     ### no buffers available?
 979     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
 980         $self->_error( $self->_no_buffer_content( $self->archive ) );
 981     }
 982
 983     $self->_print($fh, $buffer) if defined $buffer;
 984
 985     close $fh;
 986
 987     ### set what files where extract, and where they went ###
 988     $self->files( [$self->_gunzip_to] );
 989     $self->extract_path( File::Spec->rel2abs(cwd()) );
 990
 991     return 1;
 992 }
 993
 994 sub _gunzip_cz {
 995     my $self = shift;
 996
 997     my $use_list = { 'Compress::Zlib' => '0.0' };
 998     unless( can_load( modules => $use_list ) ) {
 999         $self->_error(loc("You do not have '%1' installed - Please " .
1000                     "install it as soon as possible.", 'Compress::Zlib'));
1001         return METHOD_NA;
1002     }
1003
1004     my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
1005                 return $self->_error(loc("Unable to open '%1': %2",
1006                             $self->archive, $Compress::Zlib::gzerrno));
1007
1008     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1009         return $self->_error(loc("Could not open '%1' for writing: %2",
1010                             $self->_gunzip_to, $! ));
1011
1012     my $buffer;
1013     $self->_print($fh, $buffer) while $gz->gzread($buffer) > 0;
1014     $fh->close;
1015
1016     ### set what files where extract, and where they went ###
1017     $self->files( [$self->_gunzip_to] );
1018     $self->extract_path( File::Spec->rel2abs(cwd()) );
1019
1020     return 1;
1021 }
1022
1023 #################################
1024 #
1025 # Uncompress code
1026 #
1027 #################################
1028
1029 sub _uncompress_bin {
1030     my $self = shift;
1031
1032     ### check for /bin/gzip -- we need it ###
1033     unless( $self->bin_uncompress ) {
1034         $self->_error(loc("No '%1' program found", '/bin/uncompress'));
1035         return METHOD_NA;
1036     }
1037
1038     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1039         return $self->_error(loc("Could not open '%1' for writing: %2",
1040                             $self->_gunzip_to, $! ));
1041
1042     my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
1043
1044     my $buffer;
1045     unless( scalar run( command => $cmd,
1046                         verbose => $DEBUG,
1047                         buffer  => \$buffer )
1048     ) {
1049         return $self->_error(loc("Unable to uncompress '%1': %2",
1050                                     $self->archive, $buffer));
1051     }
1052
1053     ### no buffers available?
1054     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1055         $self->_error( $self->_no_buffer_content( $self->archive ) );
1056     }
1057
1058     $self->_print($fh, $buffer) if defined $buffer;
1059
1060     close $fh;
1061
1062     ### set what files where extract, and where they went ###
1063     $self->files( [$self->_gunzip_to] );
1064     $self->extract_path( File::Spec->rel2abs(cwd()) );
1065
1066     return 1;
1067 }
1068
1069
1070 #################################
1071 #
1072 # Unzip code
1073 #
1074 #################################
1075
1076
1077 sub _unzip_bin {
1078     my $self = shift;
1079
1080     ### check for /bin/gzip if we need it ###
1081     unless( $self->bin_unzip ) {
1082         $self->_error(loc("No '%1' program found", '/bin/unzip'));
1083         return METHOD_NA;
1084     }
1085
1086     ### first, get the files.. it must be 2 different commands with 'unzip' :(
1087     {   ### on VMS, capital letter options have to be quoted. This is
1088         ### reported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
1089         ### Subject: [patch@31735]Archive Extract fix on VMS.
1090         my $opt = ON_VMS ? '"-Z"' : '-Z';
1091         my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
1092
1093         my $buffer = '';
1094         unless( scalar run( command => $cmd,
1095                             verbose => $DEBUG,
1096                             buffer  => \$buffer )
1097         ) {
1098             return $self->_error(loc("Unable to unzip '%1': %2",
1099                                         $self->archive, $buffer));
1100         }
1101
1102         ### no buffers available?
1103         if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1104             $self->_error( $self->_no_buffer_files( $self->archive ) );
1105
1106         } else {
1107             ### Annoyingly, pesky MSWin32 can either have 'native' tools
1108             ### which have \r\n line endings or Cygwin-based tools which
1109             ### have \n line endings. Jan Dubois suggested using this fix
1110             local $/ = ON_WIN32 ? qr/\r?\n/ : "\n";
1111             $self->files( [split $/, $buffer] );
1112         }
1113     }
1114
1115     ### now, extract the archive ###
1116     {   my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
1117
1118         my $buffer;
1119         unless( scalar run( command => $cmd,
1120                             verbose => $DEBUG,
1121                             buffer  => \$buffer )
1122         ) {
1123             return $self->_error(loc("Unable to unzip '%1': %2",
1124                                         $self->archive, $buffer));
1125         }
1126
1127         if( scalar @{$self->files} ) {
1128             my $files   = $self->files;
1129             my $dir     = $self->__get_extract_dir( $files );
1130
1131             $self->extract_path( $dir );
1132         }
1133     }
1134
1135     return 1;
1136 }
1137
1138 sub _unzip_az {
1139     my $self = shift;
1140
1141     my $use_list = { 'Archive::Zip' => '0.0' };
1142     unless( can_load( modules => $use_list ) ) {
1143         $self->_error(loc("You do not have '%1' installed - Please " .
1144                       "install it as soon as possible.", 'Archive::Zip'));
1145         return METHOD_NA;
1146     }
1147
1148     my $zip = Archive::Zip->new();
1149
1150     unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1151         return $self->_error(loc("Unable to read '%1'", $self->archive));
1152     }
1153
1154     my @files;
1155
1156
1157     ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1158     ### "In my BackPAN indexing, Archive::Zip was extracting things
1159     ### in my script's directory instead of the current working directory.
1160     ### I traced this back through Archive::Zip::_asLocalName which
1161     ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1162     ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1163     ### case, even though I think I'm on the same drive.
1164     ###
1165     ### To fix this, I pass the optional second argument to
1166     ### extractMember using the cwd from Archive::Extract." --bdfoy
1167
1168     ## store cwd() before looping; calls to cwd() can be expensive, and
1169     ### it won't change during the loop
1170     my $extract_dir = cwd();
1171
1172     ### have to extract every member individually ###
1173     for my $member ($zip->members) {
1174         push @files, $member->{fileName};
1175
1176         ### file to extract to, to avoid the above problem
1177         my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1178
1179         unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1180             return $self->_error(loc("Extraction of '%1' from '%2' failed",
1181                         $member->{fileName}, $self->archive ));
1182         }
1183     }
1184
1185     my $dir = $self->__get_extract_dir( \@files );
1186
1187     ### set what files where extract, and where they went ###
1188     $self->files( \@files );
1189     $self->extract_path( File::Spec->rel2abs($dir) );
1190
1191     return 1;
1192 }
1193
1194 sub __get_extract_dir {
1195     my $self    = shift;
1196     my $files   = shift || [];
1197
1198     return unless scalar @$files;
1199
1200     my($dir1, $dir2);
1201     for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1202         my($dir,$pos) = @$aref;
1203
1204         ### add a catdir(), so that any trailing slashes get
1205         ### take care of (removed)
1206         ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1207         ### which was the problem in bug #23999
1208         my $res = -d $files->[$pos]
1209                     ? File::Spec->catdir( $files->[$pos], '' )
1210                     : File::Spec->catdir( dirname( $files->[$pos] ) );
1211
1212         $$dir = $res;
1213     }
1214
1215     ### if the first and last dir don't match, make sure the
1216     ### dirname is not set wrongly
1217     my $dir;
1218
1219     ### dirs are the same, so we know for sure what the extract dir is
1220     if( $dir1 eq $dir2 ) {
1221         $dir = $dir1;
1222
1223     ### dirs are different.. do they share the base dir?
1224     ### if so, use that, if not, fall back to '.'
1225     } else {
1226         my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1227         my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1228
1229         $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1230     }
1231
1232     return File::Spec->rel2abs( $dir );
1233 }
1234
1235 #################################
1236 #
1237 # Bunzip2 code
1238 #
1239 #################################
1240
1241 sub _bunzip2_bin {
1242     my $self = shift;
1243
1244     ### check for /bin/gzip -- we need it ###
1245     unless( $self->bin_bunzip2 ) {
1246         $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1247         return METHOD_NA;
1248     }
1249
1250     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1251         return $self->_error(loc("Could not open '%1' for writing: %2",
1252                             $self->_gunzip_to, $! ));
1253
1254     ### guard against broken bunzip2. See ->have_old_bunzip2()
1255     ### for details
1256     if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1257         return $self->_error(loc("Your bunzip2 version is too old and ".
1258                                  "can only extract files ending in '%1'",
1259                                  '.bz2'));
1260     }
1261
1262     my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1263
1264     my $buffer;
1265     unless( scalar run( command => $cmd,
1266                         verbose => $DEBUG,
1267                         buffer  => \$buffer )
1268     ) {
1269         return $self->_error(loc("Unable to bunzip2 '%1': %2",
1270                                     $self->archive, $buffer));
1271     }
1272
1273     ### no buffers available?
1274     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1275         $self->_error( $self->_no_buffer_content( $self->archive ) );
1276     }
1277
1278     $self->_print($fh, $buffer) if defined $buffer;
1279
1280     close $fh;
1281
1282     ### set what files where extract, and where they went ###
1283     $self->files( [$self->_gunzip_to] );
1284     $self->extract_path( File::Spec->rel2abs(cwd()) );
1285
1286     return 1;
1287 }
1288
1289 ### using cz2, the compact versions... this we use mainly in archive::tar
1290 ### extractor..
1291 # sub _bunzip2_cz1 {
1292 #     my $self = shift;
1293 #
1294 #     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1295 #     unless( can_load( modules => $use_list ) ) {
1296 #         return $self->_error(loc("You do not have '%1' installed - Please " .
1297 #                         "install it as soon as possible.",
1298 #                         'IO::Uncompress::Bunzip2'));
1299 #     }
1300 #
1301 #     my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1302 #                 return $self->_error(loc("Unable to open '%1': %2",
1303 #                             $self->archive,
1304 #                             $IO::Uncompress::Bunzip2::Bunzip2Error));
1305 #
1306 #     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1307 #         return $self->_error(loc("Could not open '%1' for writing: %2",
1308 #                             $self->_gunzip_to, $! ));
1309 #
1310 #     my $buffer;
1311 #     $fh->print($buffer) while $bz->read($buffer) > 0;
1312 #     $fh->close;
1313 #
1314 #     ### set what files where extract, and where they went ###
1315 #     $self->files( [$self->_gunzip_to] );
1316 #     $self->extract_path( File::Spec->rel2abs(cwd()) );
1317 #
1318 #     return 1;
1319 # }
1320
1321 sub _bunzip2_bz2 {
1322     my $self = shift;
1323
1324     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1325     unless( can_load( modules => $use_list ) ) {
1326         $self->_error(loc("You do not have '%1' installed - Please " .
1327                           "install it as soon as possible.",
1328                           'IO::Uncompress::Bunzip2'));
1329         return METHOD_NA;
1330     }
1331
1332     IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1333         or return $self->_error(loc("Unable to uncompress '%1': %2",
1334                             $self->archive,
1335                             $IO::Uncompress::Bunzip2::Bunzip2Error));
1336
1337     ### set what files where extract, and where they went ###
1338     $self->files( [$self->_gunzip_to] );
1339     $self->extract_path( File::Spec->rel2abs(cwd()) );
1340
1341     return 1;
1342 }
1343
1344 #################################
1345 #
1346 # UnXz code
1347 #
1348 #################################
1349
1350 sub _unxz_bin {
1351     my $self = shift;
1352
1353     ### check for /bin/unxz -- we need it ###
1354     unless( $self->bin_unxz ) {
1355         $self->_error(loc("No '%1' program found", '/bin/unxz'));
1356         return METHOD_NA;
1357     }
1358
1359     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1360         return $self->_error(loc("Could not open '%1' for writing: %2",
1361                             $self->_gunzip_to, $! ));
1362
1363     my $cmd = [ $self->bin_unxz, '-cdf', $self->archive ];
1364
1365     my $buffer;
1366     unless( scalar run( command => $cmd,
1367                         verbose => $DEBUG,
1368                         buffer  => \$buffer )
1369     ) {
1370         return $self->_error(loc("Unable to unxz '%1': %2",
1371                                     $self->archive, $buffer));
1372     }
1373
1374     ### no buffers available?
1375     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1376         $self->_error( $self->_no_buffer_content( $self->archive ) );
1377     }
1378
1379     $self->_print($fh, $buffer) if defined $buffer;
1380
1381     close $fh;
1382
1383     ### set what files where extract, and where they went ###
1384     $self->files( [$self->_gunzip_to] );
1385     $self->extract_path( File::Spec->rel2abs(cwd()) );
1386
1387     return 1;
1388 }
1389
1390 sub _unxz_cz {
1391     my $self = shift;
1392
1393     my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
1394     unless( can_load( modules => $use_list ) ) {
1395         $self->_error(loc("You do not have '%1' installed - Please " .
1396                           "install it as soon as possible.",
1397                           'IO::Uncompress::UnXz'));
1398         return METHOD_NA;
1399     }
1400
1401     IO::Uncompress::UnXz::unxz($self->archive => $self->_gunzip_to)
1402         or return $self->_error(loc("Unable to uncompress '%1': %2",
1403                             $self->archive,
1404                             $IO::Uncompress::UnXz::UnXzError));
1405
1406     ### set what files where extract, and where they went ###
1407     $self->files( [$self->_gunzip_to] );
1408     $self->extract_path( File::Spec->rel2abs(cwd()) );
1409
1410     return 1;
1411 }
1412
1413
1414 #################################
1415 #
1416 # unlzma code
1417 #
1418 #################################
1419
1420 sub _unlzma_bin {
1421     my $self = shift;
1422
1423     ### check for /bin/unlzma -- we need it ###
1424     unless( $self->bin_unlzma ) {
1425         $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1426         return METHOD_NA;
1427     }
1428
1429     my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1430         return $self->_error(loc("Could not open '%1' for writing: %2",
1431                             $self->_gunzip_to, $! ));
1432
1433     my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1434
1435     my $buffer;
1436     unless( scalar run( command => $cmd,
1437                         verbose => $DEBUG,
1438                         buffer  => \$buffer )
1439     ) {
1440         return $self->_error(loc("Unable to unlzma '%1': %2",
1441                                     $self->archive, $buffer));
1442     }
1443
1444     ### no buffers available?
1445     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1446         $self->_error( $self->_no_buffer_content( $self->archive ) );
1447     }
1448
1449     $self->_print($fh, $buffer) if defined $buffer;
1450
1451     close $fh;
1452
1453     ### set what files where extract, and where they went ###
1454     $self->files( [$self->_gunzip_to] );
1455     $self->extract_path( File::Spec->rel2abs(cwd()) );
1456
1457     return 1;
1458 }
1459
1460 sub _unlzma_cz {
1461     my $self = shift;
1462
1463     my $use_list1 = { 'IO::Uncompress::UnLzma' => '0.0' };
1464     my $use_list2 = { 'Compress::unLZMA' => '0.0' };
1465
1466     if (can_load( modules => $use_list1 ) ) {
1467         IO::Uncompress::UnLzma::unlzma($self->archive => $self->_gunzip_to)
1468             or return $self->_error(loc("Unable to uncompress '%1': %2",
1469                                 $self->archive,
1470                                 $IO::Uncompress::UnLzma::UnLzmaError));
1471     }
1472     elsif (can_load( modules => $use_list2 ) ) {
1473
1474         my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1475             return $self->_error(loc("Could not open '%1' for writing: %2",
1476                                 $self->_gunzip_to, $! ));
1477
1478         my $buffer;
1479         $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1480         unless ( defined $buffer ) {
1481             return $self->_error(loc("Could not unlzma '%1': %2",
1482                                         $self->archive, $@));
1483         }
1484
1485         $self->_print($fh, $buffer) if defined $buffer;
1486
1487         close $fh;
1488     }
1489     else {
1490         $self->_error(loc("You do not have '%1' or '%2' installed - Please " .
1491                     "install it as soon as possible.", 'Compress::unLZMA', 'IO::Uncompress::UnLzma'));
1492         return METHOD_NA;
1493     }
1494
1495     ### set what files where extract, and where they went ###
1496     $self->files( [$self->_gunzip_to] );
1497     $self->extract_path( File::Spec->rel2abs(cwd()) );
1498
1499     return 1;
1500 }
1501
1502 #################################
1503 #
1504 # Error code
1505 #
1506 #################################
1507
1508 # For printing binaries that avoids interfering globals
1509 sub _print {
1510     my $self = shift;
1511     my $fh = shift;
1512
1513     local( $\, $", $, ) = ( undef, ' ', '' );
1514     return print $fh @_;
1515 }
1516
1517 sub _error {
1518     my $self    = shift;
1519     my $error   = shift;
1520     my $lerror  = Carp::longmess($error);
1521
1522     push @{$self->_error_msg},      $error;
1523     push @{$self->_error_msg_long}, $lerror;
1524
1525     ### set $Archive::Extract::WARN to 0 to disable printing
1526     ### of errors
1527     if( $WARN ) {
1528         carp $DEBUG ? $lerror : $error;
1529     }
1530
1531     return;
1532 }
1533
1534 sub error {
1535     my $self = shift;
1536
1537     ### make sure we have a fallback aref
1538     my $aref = do {
1539         shift()
1540             ? $self->_error_msg_long
1541             : $self->_error_msg
1542     } || [];
1543
1544     return join $/, @$aref;
1545 }
1546
1547 =head2 debug( MESSAGE )
1548
1549 This method outputs MESSAGE to the default filehandle if C<$DEBUG> is
1550 true. It's a small method, but it's here if you'd like to subclass it
1551 so you can so something else with any debugging output.
1552
1553 =cut
1554
1555 ### this is really a stub for subclassing
1556 sub debug {
1557     return unless $DEBUG;
1558
1559     print $_[1];
1560 }
1561
1562 sub _no_buffer_files {
1563     my $self = shift;
1564     my $file = shift or return;
1565     return loc("No buffer captured, unable to tell ".
1566                "extracted files or extraction dir for '%1'", $file);
1567 }
1568
1569 sub _no_buffer_content {
1570     my $self = shift;
1571     my $file = shift or return;
1572     return loc("No buffer captured, unable to get content for '%1'", $file);
1573 }
1574 1;
1575
1576 =pod
1577
1578 =head1 HOW IT WORKS
1579
1580 C<Archive::Extract> tries first to determine what type of archive you
1581 are passing it, by inspecting its suffix. It does not do this by using
1582 Mime magic, or something related. See C<CAVEATS> below.
1583
1584 Once it has determined the file type, it knows which extraction methods
1585 it can use on the archive. It will try a perl solution first, then fall
1586 back to a commandline tool if that fails. If that also fails, it will
1587 return false, indicating it was unable to extract the archive.
1588 See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1589
1590 =head1 CAVEATS
1591
1592 =head2 File Extensions
1593
1594 C<Archive::Extract> trusts on the extension of the archive to determine
1595 what type it is, and what extractor methods therefore can be used. If
1596 your archives do not have any of the extensions as described in the
1597 C<new()> method, you will have to specify the type explicitly, or
1598 C<Archive::Extract> will not be able to extract the archive for you.
1599
1600 =head2 Supporting Very Large Files
1601
1602 C<Archive::Extract> can use either pure perl modules or command line
1603 programs under the hood. Some of the pure perl modules (like
1604 C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
1605 which may not be feasible on your system. Consider setting the global
1606 variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1607 the use of command line programs and won't consume so much memory.
1608
1609 See the C<GLOBAL VARIABLES> section below for details.
1610
1611 =head2 Bunzip2 support of arbitrary extensions.
1612
1613 Older versions of C</bin/bunzip2> do not support arbitrary file
1614 extensions and insist on a C<.bz2> suffix. Although we do our best
1615 to guard against this, if you experience a bunzip2 error, it may
1616 be related to this. For details, please see the C<have_old_bunzip2>
1617 method.
1618
1619 =head1 GLOBAL VARIABLES
1620
1621 =head2 $Archive::Extract::DEBUG
1622
1623 Set this variable to C<true> to have all calls to command line tools
1624 be printed out, including all their output.
1625 This also enables C<Carp::longmess> errors, instead of the regular
1626 C<carp> errors.
1627
1628 Good for tracking down why things don't work with your particular
1629 setup.
1630
1631 Defaults to C<false>.
1632
1633 =head2 $Archive::Extract::WARN
1634
1635 This variable controls whether errors encountered internally by
1636 C<Archive::Extract> should be C<carp>'d or not.
1637
1638 Set to false to silence warnings. Inspect the output of the C<error()>
1639 method manually to see what went wrong.
1640
1641 Defaults to C<true>.
1642
1643 =head2 $Archive::Extract::PREFER_BIN
1644
1645 This variables controls whether C<Archive::Extract> should prefer the
1646 use of perl modules, or commandline tools to extract archives.
1647
1648 Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1649
1650 Defaults to C<false>.
1651
1652 =head1 TODO / CAVEATS
1653
1654 =over 4
1655
1656 =item Mime magic support
1657
1658 Maybe this module should use something like C<File::Type> to determine
1659 the type, rather than blindly trust the suffix.
1660
1661 =item Thread safety
1662
1663 Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
1664 extraction, and a C<chdir> back again after. This is not necessarily
1665 thread safe. See C<rt.cpan.org> bug C<#45671> for details.
1666
1667 =back
1668
1669 =head1 BUG REPORTS
1670
1671 Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.orgE<gt>.
1672
1673 =head1 AUTHOR
1674
1675 This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1676
1677 =head1 COPYRIGHT
1678
1679 This library is free software; you may redistribute and/or modify it
1680 under the same terms as Perl itself.
1681
1682 =cut
1683
1684 # Local variables:
1685 # c-indentation-style: bsd
1686 # c-basic-offset: 4
1687 # indent-tabs-mode: nil
1688 # End:
1689 # vim: expandtab shiftwidth=4:
1690