This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Archive-Extract to CPAN version 0.54
[perl5.git] / cpan / Archive-Extract / lib / Archive / Extract.pm
CommitLineData
520c99e2
JB
1package Archive::Extract;
2
3use strict;
4
f62b3c7e 5use Cwd qw[cwd chdir];
520c99e2
JB
6use Carp qw[carp];
7use IPC::Cmd qw[run can_run];
8use FileHandle;
9use File::Path qw[mkpath];
10use File::Spec;
11use File::Basename qw[dirname basename];
12use Params::Check qw[check];
13use Module::Load::Conditional qw[can_load check_install];
14use Locale::Maketext::Simple Style => 'gettext';
15
16### solaris has silly /bin/tar output ###
17use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
9f1eb87f 18use constant ON_NETBSD => $^O eq 'netbsd' ? 1 : 0;
f37a9f48 19use constant ON_FREEBSD => $^O eq 'freebsd' ? 1 : 0;
520c99e2
JB
20use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
21
4f3b9739
JM
22### VMS may require quoting upper case command options
23use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
24
e74f3fd4
JB
25### Windows needs special treatment of Tar options
26use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
27
83285295
JB
28### we can't use this extraction method, because of missing
29### modules/binaries:
f37a9f48 30use constant METHOD_NA => [];
83285295 31
520c99e2
JB
32### If these are changed, update @TYPES and the new() POD
33use constant TGZ => 'tgz';
34use constant TAR => 'tar';
35use constant GZ => 'gz';
36use constant ZIP => 'zip';
37use constant BZ2 => 'bz2';
38use constant TBZ => 'tbz';
1dae2fb5 39use constant Z => 'Z';
8d2ac73b 40use constant LZMA => 'lzma';
d7f87992
CBW
41use constant XZ => 'xz';
42use constant TXZ => 'txz';
520c99e2 43
f37a9f48 44use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
198e857c 45 $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER
83285295
JB
46 ];
47
4afdee4c 48$VERSION = '0.54';
83285295
JB
49$PREFER_BIN = 0;
50$WARN = 1;
51$DEBUG = 0;
52$_ALLOW_PURE_PERL = 1; # allow pure perl extractors
53$_ALLOW_BIN = 1; # allow binary extractors
198e857c 54$_ALLOW_TAR_ITER = 1; # try to use Archive::Tar->iter if available
520c99e2 55
83285295 56# same as all constants
f37a9f48 57my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA, XZ, TXZ );
520c99e2
JB
58
59local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
60
61=pod
62
63=head1 NAME
64
65Archive::Extract - A generic archive extracting mechanism
66
67=head1 SYNOPSIS
68
69 use Archive::Extract;
70
71 ### build an Archive::Extract object ###
72 my $ae = Archive::Extract->new( archive => 'foo.tgz' );
73
74 ### extract to cwd() ###
75 my $ok = $ae->extract;
76
77 ### extract to /tmp ###
78 my $ok = $ae->extract( to => '/tmp' );
79
80 ### what if something went wrong?
81 my $ok = $ae->extract or die $ae->error;
82
83 ### files from the archive ###
84 my $files = $ae->files;
85
86 ### dir that was extracted to ###
87 my $outdir = $ae->extract_path;
88
89
90 ### quick check methods ###
91 $ae->is_tar # is it a .tar file?
92 $ae->is_tgz # is it a .tar.gz or .tgz file?
93 $ae->is_gz; # is it a .gz file?
94 $ae->is_zip; # is it a .zip file?
95 $ae->is_bz2; # is it a .bz2 file?
96 $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
8d2ac73b 97 $ae->is_lzma; # is it a .lzma file?
d7f87992
CBW
98 $ae->is_xz; # is it a .xz file?
99 $ae->is_txz; # is it a .tar.xz or .txz file?
520c99e2
JB
100
101 ### absolute path to the archive you provided ###
102 $ae->archive;
103
104 ### commandline tools, if found ###
105 $ae->bin_tar # path to /bin/tar, if found
106 $ae->bin_gzip # path to /bin/gzip, if found
107 $ae->bin_unzip # path to /bin/unzip, if found
108 $ae->bin_bunzip2 # path to /bin/bunzip2 if found
8d2ac73b 109 $ae->bin_unlzma # path to /bin/unlzma if found
d7f87992 110 $ae->bin_unxz # path to /bin/unxz if found
520c99e2
JB
111
112=head1 DESCRIPTION
113
114Archive::Extract is a generic archive extraction mechanism.
115
116It allows you to extract any archive file of the type .tar, .tar.gz,
f37a9f48
CBW
117.gz, .Z, tar.bz2, .tbz, .bz2, .zip, .xz,, .txz, .tar.xz or .lzma
118without having to worry how it
119does so, or use different interfaces for each type by using either
1dae2fb5 120perl modules, or commandline tools on your system.
520c99e2
JB
121
122See the C<HOW IT WORKS> section further down for details.
123
124=cut
125
126
127### see what /bin/programs are available ###
128$PROGRAMS = {};
d7f87992 129for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma unxz]) {
f37a9f48 130 if ( $pgm eq 'unzip' and ( ON_NETBSD or ON_FREEBSD ) ) {
9f1eb87f
CBW
131 local $IPC::Cmd::INSTANCES = 1;
132 my @possibles = can_run($pgm);
f37a9f48 133 ($PROGRAMS->{$pgm}) = grep { ON_NETBSD ? m!/usr/pkg/! : m!/usr/local! } can_run($pgm);
9f1eb87f
CBW
134 next;
135 }
520c99e2
JB
136 $PROGRAMS->{$pgm} = can_run($pgm);
137}
138
139### mapping from types to extractor methods ###
83285295
JB
140my $Mapping = { # binary program # pure perl module
141 is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
142 is_tar => { bin => '_untar_bin', pp => '_untar_at' },
143 is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
144 is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
145 is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
146 is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
147 is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
148 is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
d7f87992
CBW
149 is_xz => { bin => '_unxz_bin', pp => '_unxz_cz' },
150 is_txz => { bin => '_untar_bin', pp => '_untar_at' },
520c99e2
JB
151};
152
eadbb00b 153{ ### use subs so we re-generate array refs etc for the no-override flags
83285295
JB
154 ### if we don't, then we reuse the same arrayref, meaning objects store
155 ### previous errors
520c99e2 156 my $tmpl = {
83285295
JB
157 archive => sub { { required => 1, allow => FILE_EXISTS } },
158 type => sub { { default => '', allow => [ @Types ] } },
159 _error_msg => sub { { no_override => 1, default => [] } },
160 _error_msg_long => sub { { no_override => 1, default => [] } },
520c99e2
JB
161 };
162
eadbb00b 163 ### build accessors ###
f37a9f48 164 for my $method( keys %$tmpl,
520c99e2 165 qw[_extractor _gunzip_to files extract_path],
520c99e2
JB
166 ) {
167 no strict 'refs';
168 *$method = sub {
169 my $self = shift;
170 $self->{$method} = $_[0] if @_;
171 return $self->{$method};
172 }
173 }
174
175=head1 METHODS
176
177=head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
178
179Creates a new C<Archive::Extract> object based on the archive file you
180passed it. Automatically determines the type of archive based on the
181extension, but you can override that by explicitly providing the
182C<type> argument.
183
184Valid values for C<type> are:
185
186=over 4
187
188=item tar
189
190Standard tar files, as produced by, for example, C</bin/tar>.
191Corresponds to a C<.tar> suffix.
192
193=item tgz
194
195Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
196Corresponds to a C<.tgz> or C<.tar.gz> suffix.
197
198=item gz
199
200Gzip compressed file, as produced by, for example C</bin/gzip>.
201Corresponds to a C<.gz> suffix.
202
1dae2fb5
RGS
203=item Z
204
205Lempel-Ziv compressed file, as produced by, for example C</bin/compress>.
206Corresponds to a C<.Z> suffix.
207
520c99e2
JB
208=item zip
209
210Zip compressed file, as produced by, for example C</bin/zip>.
211Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
212
213=item bz2
214
215Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
216Corresponds to a C<.bz2> suffix.
217
218=item tbz
219
9f1eb87f 220Bzip2 compressed tar file, as produced by, for example C</bin/tar -j>.
520c99e2
JB
221Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
222
8d2ac73b
SP
223=item lzma
224
225Lzma compressed file, as produced by C</bin/lzma>.
226Corresponds to a C<.lzma> suffix.
227
d7f87992
CBW
228=item xz
229
230Xz compressed file, as produced by C</bin/xz>.
231Corresponds to a C<.xz> suffix.
232
233=item txz
234
9f1eb87f 235Xz compressed tar file, as produced by, for example C</bin/tar -J>.
d7f87992
CBW
236Corresponds to a C<.txz> or C<.tar.xz> suffix.
237
520c99e2
JB
238=back
239
240Returns a C<Archive::Extract> object on success, or false on failure.
241
242=cut
243
244 ### constructor ###
245 sub new {
246 my $class = shift;
247 my %hash = @_;
f37a9f48 248
83285295
JB
249 ### see above why we use subs here and generate the template;
250 ### it's basically to not re-use arrayrefs
251 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
520c99e2 252
83285295 253 my $parsed = check( \%utmpl, \%hash ) or return;
520c99e2
JB
254
255 ### make sure we have an absolute path ###
256 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
257
258 ### figure out the type, if it wasn't already specified ###
259 unless ( $parsed->{type} ) {
260 $parsed->{type} =
574b415d 261 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
520c99e2
JB
262 $ar =~ /.+?\.gz$/i ? GZ :
263 $ar =~ /.+?\.tar$/i ? TAR :
264 $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
9e5a0ef9 265 $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
520c99e2 266 $ar =~ /.+?\.bz2$/i ? BZ2 :
1dae2fb5 267 $ar =~ /.+?\.Z$/ ? Z :
8d2ac73b 268 $ar =~ /.+?\.lzma$/ ? LZMA :
d7f87992
CBW
269 $ar =~ /.+?\.(?:txz|tar\.xz)$/i ? TXZ :
270 $ar =~ /.+?\.xz$/ ? XZ :
520c99e2
JB
271 '';
272
273 }
274
83285295 275 bless $parsed, $class;
520c99e2 276
f37a9f48 277 ### don't know what type of file it is
83285295
JB
278 ### XXX this *has* to be an object call, not a package call
279 return $parsed->_error(loc("Cannot determine file type for '%1'",
280 $parsed->{archive} )) unless $parsed->{type};
281 return $parsed;
520c99e2
JB
282 }
283}
284
285=head2 $ae->extract( [to => '/output/path'] )
286
287Extracts the archive represented by the C<Archive::Extract> object to
288the path of your choice as specified by the C<to> argument. Defaults to
289C<cwd()>.
290
f37a9f48
CBW
291Since C<.gz> files never hold a directory, but only a single file; if
292the C<to> argument is an existing directory, the file is extracted
293there, with its C<.gz> suffix stripped.
294If the C<to> argument is not an existing directory, the C<to> argument
295is understood to be a filename, if the archive type is C<gz>.
520c99e2 296In the case that you did not specify a C<to> argument, the output
83285295 297file will be the name of the archive file, stripped from its C<.gz>
520c99e2
JB
298suffix, in the current working directory.
299
300C<extract> will try a pure perl solution first, and then fall back to
301commandline tools if they are available. See the C<GLOBAL VARIABLES>
302section below on how to alter this behaviour.
303
304It will return true on success, and false on failure.
305
306On success, it will also set the follow attributes in the object:
307
308=over 4
309
310=item $ae->extract_path
311
312This is the directory that the files where extracted to.
313
314=item $ae->files
315
316This is an array ref with the paths of all the files in the archive,
317relative to the C<to> argument you specified.
318To get the full path to an extracted file, you would use:
319
320 File::Spec->catfile( $to, $ae->files->[0] );
321
322Note that all files from a tar archive will be in unix format, as per
323the tar specification.
324
325=back
326
327=cut
328
329sub extract {
330 my $self = shift;
331 my %hash = @_;
332
83285295
JB
333 ### reset error messages
334 $self->_error_msg( [] );
335 $self->_error_msg_long( [] );
336
520c99e2
JB
337 my $to;
338 my $tmpl = {
339 to => { default => '.', store => \$to }
340 };
341
342 check( $tmpl, \%hash ) or return;
343
f37a9f48 344 ### so 'to' could be a file or a dir, depending on whether it's a .gz
520c99e2
JB
345 ### file, or basically anything else.
346 ### so, check that, then act accordingly.
347 ### set an accessor specifically so _gunzip can know what file to extract
348 ### to.
349 my $dir;
350 { ### a foo.gz file
d7f87992 351 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma or $self->is_xz ) {
f37a9f48 352
d7f87992 353 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma|xz)$//i;
f37a9f48 354
520c99e2
JB
355 ### to is a dir?
356 if ( -d $to ) {
f37a9f48 357 $dir = $to;
520c99e2
JB
358 $self->_gunzip_to( basename($cp) );
359
360 ### then it's a filename
361 } else {
362 $dir = dirname($to);
363 $self->_gunzip_to( basename($to) );
364 }
365
366 ### not a foo.gz file
367 } else {
368 $dir = $to;
369 }
370 }
371
372 ### make the dir if it doesn't exist ###
373 unless( -d $dir ) {
374 eval { mkpath( $dir ) };
375
376 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
377 if $@;
378 }
379
380 ### get the current dir, to restore later ###
381 my $cwd = cwd();
382
383 my $ok = 1;
384 EXTRACT: {
385
386 ### chdir to the target dir ###
387 unless( chdir $dir ) {
388 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
389 $ok = 0; last EXTRACT;
390 }
391
392 ### set files to an empty array ref, so there's always an array
393 ### ref IN the accessor, to avoid errors like:
394 ### Can't use an undefined value as an ARRAY reference at
395 ### ../lib/Archive/Extract.pm line 742. (rt #19815)
396 $self->files( [] );
397
f37a9f48 398 ### find out the dispatch methods needed for this type of
83285295
JB
399 ### archive. Do a $self->is_XXX to figure out the type, then
400 ### get the hashref with bin + pure perl dispatchers.
401 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
520c99e2 402
83285295
JB
403 ### add pure perl extractor if allowed & add bin extractor if allowed
404 my @methods;
405 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
406 push @methods, $map->{'bin'} if $_ALLOW_BIN;
f37a9f48 407
83285295
JB
408 ### reverse it if we prefer bin extractors
409 @methods = reverse @methods if $PREFER_BIN;
520c99e2 410
83285295
JB
411 my($na, $fail);
412 for my $method (@methods) {
a6696b92 413 $self->debug( "# Extracting with ->$method\n" );
f37a9f48 414
83285295 415 my $rv = $self->$method;
f37a9f48 416
83285295
JB
417 ### a positive extraction
418 if( $rv and $rv ne METHOD_NA ) {
a6696b92 419 $self->debug( "# Extraction succeeded\n" );
83285295
JB
420 $self->_extractor($method);
421 last;
f37a9f48 422
83285295 423 ### method is not available
f37a9f48 424 } elsif ( $rv and $rv eq METHOD_NA ) {
a6696b92 425 $self->debug( "# Extraction method not available\n" );
f37a9f48 426 $na++;
83285295 427 } else {
a6696b92 428 $self->debug( "# Extraction method failed\n" );
83285295 429 $fail++;
f37a9f48 430 }
83285295 431 }
520c99e2 432
83285295
JB
433 ### warn something went wrong if we didn't get an extractor
434 unless( $self->_extractor ) {
435 my $diag = $fail ? loc("Extract failed due to errors") :
436 $na ? loc("Extract failed; no extractors available") :
437 '';
f37a9f48 438
83285295
JB
439 $self->_error($diag);
440 $ok = 0;
f37a9f48 441 }
520c99e2
JB
442 }
443
444 ### and chdir back ###
445 unless( chdir $cwd ) {
446 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
447 $cwd, $!));
448 }
449
450 return $ok;
451}
452
453=pod
454
455=head1 ACCESSORS
456
457=head2 $ae->error([BOOL])
458
459Returns the last encountered error as string.
460Pass it a true value to get the C<Carp::longmess()> output instead.
461
462=head2 $ae->extract_path
463
464This is the directory the archive got extracted to.
465See C<extract()> for details.
466
467=head2 $ae->files
468
469This is an array ref holding all the paths from the archive.
470See C<extract()> for details.
471
472=head2 $ae->archive
473
474This is the full path to the archive file represented by this
475C<Archive::Extract> object.
476
477=head2 $ae->type
478
479This is the type of archive represented by this C<Archive::Extract>
480object. See accessors below for an easier way to use this.
481See the C<new()> method for details.
482
483=head2 $ae->types
484
485Returns a list of all known C<types> for C<Archive::Extract>'s
486C<new> method.
487
488=cut
489
490sub types { return @Types }
491
492=head2 $ae->is_tgz
493
494Returns true if the file is of type C<.tar.gz>.
495See the C<new()> method for details.
496
497=head2 $ae->is_tar
498
499Returns true if the file is of type C<.tar>.
500See the C<new()> method for details.
501
502=head2 $ae->is_gz
503
504Returns true if the file is of type C<.gz>.
505See the C<new()> method for details.
506
1dae2fb5
RGS
507=head2 $ae->is_Z
508
509Returns true if the file is of type C<.Z>.
510See the C<new()> method for details.
511
520c99e2
JB
512=head2 $ae->is_zip
513
514Returns true if the file is of type C<.zip>.
515See the C<new()> method for details.
516
8d2ac73b
SP
517=head2 $ae->is_lzma
518
519Returns true if the file is of type C<.lzma>.
520See the C<new()> method for details.
521
d7f87992
CBW
522=head2 $ae->is_xz
523
524Returns true if the file is of type C<.xz>.
525See the C<new()> method for details.
526
520c99e2
JB
527=cut
528
529### quick check methods ###
530sub is_tgz { return $_[0]->type eq TGZ }
531sub is_tar { return $_[0]->type eq TAR }
532sub is_gz { return $_[0]->type eq GZ }
533sub is_zip { return $_[0]->type eq ZIP }
534sub is_tbz { return $_[0]->type eq TBZ }
535sub is_bz2 { return $_[0]->type eq BZ2 }
1dae2fb5 536sub is_Z { return $_[0]->type eq Z }
8d2ac73b 537sub is_lzma { return $_[0]->type eq LZMA }
d7f87992
CBW
538sub is_xz { return $_[0]->type eq XZ }
539sub is_txz { return $_[0]->type eq TXZ }
520c99e2
JB
540
541=pod
542
543=head2 $ae->bin_tar
544
545Returns the full path to your tar binary, if found.
546
547=head2 $ae->bin_gzip
548
549Returns the full path to your gzip binary, if found
550
551=head2 $ae->bin_unzip
552
553Returns the full path to your unzip binary, if found
554
8d2ac73b
SP
555=head2 $ae->bin_unlzma
556
557Returns the full path to your unlzma binary, if found
558
d7f87992
CBW
559=head2 $ae->bin_unxz
560
561Returns the full path to your unxz binary, if found
562
520c99e2
JB
563=cut
564
565### paths to commandline tools ###
1dae2fb5
RGS
566sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
567sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
568sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
569sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
f37a9f48 570sub bin_uncompress { return $PROGRAMS->{'uncompress'}
1dae2fb5 571 if $PROGRAMS->{'uncompress'} }
8d2ac73b 572sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
d7f87992 573sub bin_unxz { return $PROGRAMS->{'unxz'} if $PROGRAMS->{'unxz'} }
8d2ac73b 574
9e5a0ef9
JB
575=head2 $bool = $ae->have_old_bunzip2
576
577Older versions of C</bin/bunzip2>, from before the C<bunzip2 1.0> release,
578require all archive names to end in C<.bz2> or it will not extract
579them. This method checks if you have a recent version of C<bunzip2>
580that allows any extension, or an older one that doesn't.
581
582=cut
583
584sub have_old_bunzip2 {
585 my $self = shift;
586
587 ### no bunzip2? no old bunzip2 either :)
588 return unless $self->bin_bunzip2;
589
f37a9f48 590 ### if we can't run this, we can't be sure if it's too old or not
9e5a0ef9
JB
591 ### XXX stupid stupid stupid bunzip2 doesn't understand --version
592 ### is not a request to extract data:
593 ### $ bunzip2 --version
594 ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
595 ### [...]
596 ### bunzip2: I won't read compressed data from a terminal.
597 ### bunzip2: For help, type: `bunzip2 --help'.
598 ### $ echo $?
599 ### 1
600 ### HATEFUL!
f37a9f48 601
8d2ac73b
SP
602 ### double hateful: bunzip2 --version also hangs if input is a pipe
603 ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
604 ### So, we have to provide *another* argument which is a fake filename,
83285295 605 ### just so it wont try to read from stdin to print its version..
8d2ac73b
SP
606 ### *sigh*
607 ### Even if the file exists, it won't clobber or change it.
9e5a0ef9 608 my $buffer;
f37a9f48 609 scalar run(
8d2ac73b 610 command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
9e5a0ef9
JB
611 verbose => 0,
612 buffer => \$buffer
613 );
614
615 ### no output
616 return unless $buffer;
f37a9f48 617
9e5a0ef9
JB
618 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
619
620 return 1 if $version < 1;
621 return;
622}
520c99e2
JB
623
624#################################
625#
626# Untar code
627#
628#################################
629
e74f3fd4
JB
630### annoying issue with (gnu) tar on win32, as illustrated by this
631### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
632### which shows that (gnu) tar will interpret a file name with a :
633### in it as a remote file name, so C:\tmp\foo.txt is interpreted
634### as a remote shell, and the extract fails.
635{ my @ExtraTarFlags;
636 if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
520c99e2 637
e74f3fd4
JB
638 ### if this is gnu tar we are running, we need to use --force-local
639 push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
f37a9f48 640 }
520c99e2 641
520c99e2 642
e74f3fd4
JB
643 ### use /bin/tar to extract ###
644 sub _untar_bin {
645 my $self = shift;
f37a9f48 646
e74f3fd4
JB
647 ### check for /bin/tar ###
648 ### check for /bin/gzip if we need it ###
649 ### if any of the binaries are not available, return NA
f37a9f48 650 { my $diag = not $self->bin_tar ?
e74f3fd4 651 loc("No '%1' program found", '/bin/tar') :
f37a9f48 652 $self->is_tgz && !$self->bin_gzip ?
e74f3fd4
JB
653 loc("No '%1' program found", '/bin/gzip') :
654 $self->is_tbz && !$self->bin_bunzip2 ?
655 loc("No '%1' program found", '/bin/bunzip2') :
d7f87992
CBW
656 $self->is_txz && !$self->bin_unxz ?
657 loc("No '%1' program found", '/bin/unxz') :
e74f3fd4 658 '';
f37a9f48 659
e74f3fd4
JB
660 if( $diag ) {
661 $self->_error( $diag );
662 return METHOD_NA;
663 }
f37a9f48
CBW
664 }
665
e74f3fd4
JB
666 ### XXX figure out how to make IPC::Run do this in one call --
667 ### currently i don't know how to get output of a command after a pipe
668 ### trapped in a scalar. Mailed barries about this 5th of june 2004.
f37a9f48 669
e74f3fd4
JB
670 ### see what command we should run, based on whether
671 ### it's a .tgz or .tar
f37a9f48 672
e74f3fd4
JB
673 ### XXX solaris tar and bsdtar are having different outputs
674 ### depending whether you run with -x or -t
675 ### compensate for this insanity by running -t first, then -x
f37a9f48
CBW
676 { my $cmd =
677 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
e74f3fd4 678 $self->bin_tar, '-tf', '-'] :
f37a9f48 679 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
e74f3fd4 680 $self->bin_tar, '-tf', '-'] :
f37a9f48 681 $self->is_txz ? [$self->bin_unxz, '-cd', $self->archive, '|',
d7f87992 682 $self->bin_tar, '-tf', '-'] :
e74f3fd4 683 [$self->bin_tar, @ExtraTarFlags, '-tf', $self->archive];
f37a9f48
CBW
684
685 ### run the command
7bb74d35 686 ### newer versions of 'tar' (1.21 and up) now print record size
f37a9f48 687 ### to STDERR as well if v OR t is given (used to be both). This
7bb74d35
JB
688 ### is a 'feature' according to the changelog, so we must now only
689 ### inspect STDOUT, otherwise, failures like these occur:
f37a9f48 690 ### http://www.cpantesters.org/cpan/report/3230366
7bb74d35
JB
691 my $buffer = '';
692 my @out = run( command => $cmd,
e74f3fd4 693 buffer => \$buffer,
7bb74d35
JB
694 verbose => $DEBUG );
695
f37a9f48
CBW
696 ### command was unsuccessful
697 unless( $out[0] ) {
e74f3fd4
JB
698 return $self->_error(loc(
699 "Error listing contents of archive '%1': %2",
700 $self->archive, $buffer ));
701 }
f37a9f48 702
e74f3fd4
JB
703 ### no buffers available?
704 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
705 $self->_error( $self->_no_buffer_files( $self->archive ) );
f37a9f48 706
e74f3fd4
JB
707 } else {
708 ### if we're on solaris we /might/ be using /bin/tar, which has
709 ### a weird output format... we might also be using
710 ### /usr/local/bin/tar, which is gnu tar, which is perfectly
711 ### fine... so we have to do some guessing here =/
712 my @files = map { chomp;
713 !ON_SOLARIS ? $_
714 : (m|^ x \s+ # 'xtract' -- sigh
715 (.+?), # the actual file name
716 \s+ [\d,.]+ \s bytes,
717 \s+ [\d,.]+ \s tape \s blocks
718 |x ? $1 : $_);
f37a9f48 719
eadbb00b 720 ### only STDOUT, see above. Sometimes, extra whitespace
0228b1bb
JB
721 ### is present, so make sure we only pick lines with
722 ### a length
f37a9f48
CBW
723 } grep { length } map { split $/, $_ } join '', @{$out[3]};
724
e74f3fd4
JB
725 ### store the files that are in the archive ###
726 $self->files(\@files);
727 }
520c99e2 728 }
f37a9f48 729
e74f3fd4 730 ### now actually extract it ###
f37a9f48 731 { my $cmd =
e74f3fd4
JB
732 $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
733 $self->bin_tar, '-xf', '-'] :
f37a9f48 734 $self->is_tbz ? [$self->bin_bunzip2, '-cd', $self->archive, '|',
e74f3fd4 735 $self->bin_tar, '-xf', '-'] :
f37a9f48 736 $self->is_txz ? [$self->bin_unxz, '-cd', $self->archive, '|',
d7f87992 737 $self->bin_tar, '-xf', '-'] :
e74f3fd4 738 [$self->bin_tar, @ExtraTarFlags, '-xf', $self->archive];
f37a9f48 739
e74f3fd4
JB
740 my $buffer = '';
741 unless( scalar run( command => $cmd,
742 buffer => \$buffer,
743 verbose => $DEBUG )
744 ) {
745 return $self->_error(loc("Error extracting archive '%1': %2",
746 $self->archive, $buffer ));
747 }
f37a9f48 748
e74f3fd4
JB
749 ### we might not have them, due to lack of buffers
750 if( $self->files ) {
751 ### now that we've extracted, figure out where we extracted to
752 my $dir = $self->__get_extract_dir( $self->files );
f37a9f48 753
e74f3fd4
JB
754 ### store the extraction dir ###
755 $self->extract_path( $dir );
756 }
520c99e2 757 }
f37a9f48 758
e74f3fd4
JB
759 ### we got here, no error happened
760 return 1;
520c99e2 761 }
520c99e2
JB
762}
763
e74f3fd4 764
520c99e2
JB
765### use archive::tar to extract ###
766sub _untar_at {
767 my $self = shift;
768
83285295
JB
769 ### Loading Archive::Tar is going to set it to 1, so make it local
770 ### within this block, starting with its initial value. Whatever
771 ### Achive::Tar does will be undone when we return.
772 ###
773 ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
774 ### so users don't have to even think about this variable. If they
775 ### do, they still get their set value outside of this call.
776 local $Archive::Tar::WARN = $Archive::Tar::WARN;
f37a9f48 777
83285295 778 ### we definitely need Archive::Tar, so load that first
520c99e2
JB
779 { my $use_list = { 'Archive::Tar' => '0.0' };
780
781 unless( can_load( modules => $use_list ) ) {
782
83285295
JB
783 $self->_error(loc("You do not have '%1' installed - " .
784 "Please install it as soon as possible.",
785 'Archive::Tar'));
f37a9f48 786
83285295 787 return METHOD_NA;
520c99e2
JB
788 }
789 }
790
791 ### we might pass it a filehandle if it's a .tbz file..
792 my $fh_to_read = $self->archive;
793
794 ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
795 ### if A::T's version is 0.99 or higher
796 if( $self->is_tgz ) {
797 my $use_list = { 'Compress::Zlib' => '0.0' };
798 $use_list->{ 'IO::Zlib' } = '0.0'
799 if $Archive::Tar::VERSION >= '0.99';
800
801 unless( can_load( modules => $use_list ) ) {
802 my $which = join '/', sort keys %$use_list;
803
83285295
JB
804 $self->_error(loc(
805 "You do not have '%1' installed - Please ".
806 "install it as soon as possible.", $which)
807 );
f37a9f48 808
83285295 809 return METHOD_NA;
520c99e2 810 }
83285295 811
520c99e2
JB
812 } elsif ( $self->is_tbz ) {
813 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
814 unless( can_load( modules => $use_list ) ) {
83285295
JB
815 $self->_error(loc(
816 "You do not have '%1' installed - Please " .
f37a9f48 817 "install it as soon as possible.",
83285295
JB
818 'IO::Uncompress::Bunzip2')
819 );
f37a9f48 820
83285295 821 return METHOD_NA;
520c99e2
JB
822 }
823
824 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
825 return $self->_error(loc("Unable to open '%1': %2",
826 $self->archive,
827 $IO::Uncompress::Bunzip2::Bunzip2Error));
828
829 $fh_to_read = $bz;
d7f87992
CBW
830 } elsif ( $self->is_txz ) {
831 my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
832 unless( can_load( modules => $use_list ) ) {
833 $self->_error(loc(
834 "You do not have '%1' installed - Please " .
f37a9f48 835 "install it as soon as possible.",
d7f87992
CBW
836 'IO::Uncompress::UnXz')
837 );
f37a9f48 838
d7f87992
CBW
839 return METHOD_NA;
840 }
841
842 my $xz = IO::Uncompress::UnXz->new( $self->archive ) or
843 return $self->_error(loc("Unable to open '%1': %2",
844 $self->archive,
845 $IO::Uncompress::UnXz::UnXzError));
846
847 $fh_to_read = $xz;
520c99e2
JB
848 }
849
198e857c
JB
850 my @files;
851 {
852 ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
853 ### localized $Archive::Tar::WARN already.
854 $Archive::Tar::WARN = $Archive::Extract::WARN;
83285295 855
198e857c
JB
856 ### only tell it it's compressed if it's a .tgz, as we give it a file
857 ### handle if it's a .tbz
858 my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) );
520c99e2 859
198e857c
JB
860 ### for version of Archive::Tar > 1.04
861 local $Archive::Tar::CHOWN = 0;
520c99e2 862
198e857c
JB
863 ### use the iterator if we can. it's a feature of A::T 1.40 and up
864 if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) {
520c99e2 865
198e857c
JB
866 my $next;
867 unless ( $next = Archive::Tar->iter( @read ) ) {
868 return $self->_error(loc(
f37a9f48 869 "Unable to read '%1': %2", $self->archive,
198e857c
JB
870 $Archive::Tar::error));
871 }
520c99e2 872
198e857c
JB
873 while ( my $file = $next->() ) {
874 push @files, $file->full_path;
f37a9f48 875
198e857c 876 $file->extract or return $self->_error(loc(
f37a9f48 877 "Unable to read '%1': %2",
198e857c
JB
878 $self->archive,
879 $Archive::Tar::error));
880 }
f37a9f48
CBW
881
882 ### older version, read the archive into memory
198e857c
JB
883 } else {
884
885 my $tar = Archive::Tar->new();
886
887 unless( $tar->read( @read ) ) {
f37a9f48 888 return $self->_error(loc("Unable to read '%1': %2",
198e857c
JB
889 $self->archive, $Archive::Tar::error));
890 }
891
892 ### workaround to prevent Archive::Tar from setting uid, which
893 ### is a potential security hole. -autrijus
894 ### have to do it here, since A::T needs to be /loaded/ first ###
895 { no strict 'refs'; local $^W;
896
897 ### older versions of archive::tar <= 0.23
898 *Archive::Tar::chown = sub {};
899 }
900
901 { local $^W; # quell 'splice() offset past end of array' warnings
902 # on older versions of A::T
903
f37a9f48 904 ### older archive::tar always returns $self, return value
198e857c
JB
905 ### slightly fux0r3d because of it.
906 $tar->extract or return $self->_error(loc(
907 "Unable to extract '%1': %2",
908 $self->archive, $Archive::Tar::error ));
909 }
910
911 @files = $tar->list_files;
912 }
520c99e2
JB
913 }
914
198e857c 915 my $dir = $self->__get_extract_dir( \@files );
520c99e2
JB
916
917 ### store the files that are in the archive ###
918 $self->files(\@files);
919
920 ### store the extraction dir ###
921 $self->extract_path( $dir );
922
923 ### check if the dir actually appeared ###
924 return 1 if -d $self->extract_path;
925
926 ### no dir, we failed ###
927 return $self->_error(loc("Unable to extract '%1': %2",
928 $self->archive, $Archive::Tar::error ));
929}
930
931#################################
932#
933# Gunzip code
934#
935#################################
936
520c99e2
JB
937sub _gunzip_bin {
938 my $self = shift;
939
940 ### check for /bin/gzip -- we need it ###
83285295
JB
941 unless( $self->bin_gzip ) {
942 $self->_error(loc("No '%1' program found", '/bin/gzip'));
943 return METHOD_NA;
944 }
520c99e2
JB
945
946 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
947 return $self->_error(loc("Could not open '%1' for writing: %2",
948 $self->_gunzip_to, $! ));
949
950 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
951
952 my $buffer;
953 unless( scalar run( command => $cmd,
954 verbose => $DEBUG,
955 buffer => \$buffer )
956 ) {
957 return $self->_error(loc("Unable to gunzip '%1': %2",
958 $self->archive, $buffer));
959 }
960
961 ### no buffers available?
962 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
963 $self->_error( $self->_no_buffer_content( $self->archive ) );
964 }
965
35fe4187 966 $self->_print($fh, $buffer) if defined $buffer;
520c99e2
JB
967
968 close $fh;
969
970 ### set what files where extract, and where they went ###
971 $self->files( [$self->_gunzip_to] );
972 $self->extract_path( File::Spec->rel2abs(cwd()) );
973
974 return 1;
975}
976
977sub _gunzip_cz {
978 my $self = shift;
979
980 my $use_list = { 'Compress::Zlib' => '0.0' };
981 unless( can_load( modules => $use_list ) ) {
83285295
JB
982 $self->_error(loc("You do not have '%1' installed - Please " .
983 "install it as soon as possible.", 'Compress::Zlib'));
984 return METHOD_NA;
520c99e2
JB
985 }
986
987 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
988 return $self->_error(loc("Unable to open '%1': %2",
989 $self->archive, $Compress::Zlib::gzerrno));
990
991 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
992 return $self->_error(loc("Could not open '%1' for writing: %2",
993 $self->_gunzip_to, $! ));
994
995 my $buffer;
35fe4187 996 $self->_print($fh, $buffer) while $gz->gzread($buffer) > 0;
520c99e2
JB
997 $fh->close;
998
999 ### set what files where extract, and where they went ###
1000 $self->files( [$self->_gunzip_to] );
1001 $self->extract_path( File::Spec->rel2abs(cwd()) );
1002
1003 return 1;
1004}
1005
1006#################################
1007#
1dae2fb5
RGS
1008# Uncompress code
1009#
1010#################################
1011
1dae2fb5
RGS
1012sub _uncompress_bin {
1013 my $self = shift;
1014
1015 ### check for /bin/gzip -- we need it ###
83285295
JB
1016 unless( $self->bin_uncompress ) {
1017 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
1018 return METHOD_NA;
1019 }
1dae2fb5
RGS
1020
1021 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1022 return $self->_error(loc("Could not open '%1' for writing: %2",
1023 $self->_gunzip_to, $! ));
1024
1025 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
1026
1027 my $buffer;
1028 unless( scalar run( command => $cmd,
1029 verbose => $DEBUG,
1030 buffer => \$buffer )
1031 ) {
1032 return $self->_error(loc("Unable to uncompress '%1': %2",
1033 $self->archive, $buffer));
1034 }
1035
1036 ### no buffers available?
1037 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1038 $self->_error( $self->_no_buffer_content( $self->archive ) );
1039 }
1040
35fe4187 1041 $self->_print($fh, $buffer) if defined $buffer;
1dae2fb5
RGS
1042
1043 close $fh;
1044
1045 ### set what files where extract, and where they went ###
1046 $self->files( [$self->_gunzip_to] );
1047 $self->extract_path( File::Spec->rel2abs(cwd()) );
1048
1049 return 1;
1050}
1051
1052
1053#################################
1054#
520c99e2
JB
1055# Unzip code
1056#
1057#################################
1058
520c99e2
JB
1059
1060sub _unzip_bin {
1061 my $self = shift;
1062
1063 ### check for /bin/gzip if we need it ###
83285295
JB
1064 unless( $self->bin_unzip ) {
1065 $self->_error(loc("No '%1' program found", '/bin/unzip'));
1066 return METHOD_NA;
f37a9f48 1067 }
520c99e2
JB
1068
1069 ### first, get the files.. it must be 2 different commands with 'unzip' :(
9e5a0ef9 1070 { ### on VMS, capital letter options have to be quoted. This is
6447e912 1071 ### reported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
9e5a0ef9
JB
1072 ### Subject: [patch@31735]Archive Extract fix on VMS.
1073 my $opt = ON_VMS ? '"-Z"' : '-Z';
1074 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
f37a9f48 1075
520c99e2
JB
1076 my $buffer = '';
1077 unless( scalar run( command => $cmd,
1078 verbose => $DEBUG,
1079 buffer => \$buffer )
1080 ) {
1081 return $self->_error(loc("Unable to unzip '%1': %2",
1082 $self->archive, $buffer));
1083 }
1084
1085 ### no buffers available?
1086 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1087 $self->_error( $self->_no_buffer_files( $self->archive ) );
1088
1089 } else {
4afdee4c 1090 local $/ = ON_WIN32 ? "\r\n" : "\n";
520c99e2
JB
1091 $self->files( [split $/, $buffer] );
1092 }
1093 }
1094
1095 ### now, extract the archive ###
1096 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
1097
1098 my $buffer;
1099 unless( scalar run( command => $cmd,
1100 verbose => $DEBUG,
1101 buffer => \$buffer )
1102 ) {
1103 return $self->_error(loc("Unable to unzip '%1': %2",
1104 $self->archive, $buffer));
1105 }
1106
1107 if( scalar @{$self->files} ) {
1108 my $files = $self->files;
1109 my $dir = $self->__get_extract_dir( $files );
1110
1111 $self->extract_path( $dir );
1112 }
1113 }
1114
1115 return 1;
1116}
1117
1118sub _unzip_az {
1119 my $self = shift;
1120
1121 my $use_list = { 'Archive::Zip' => '0.0' };
1122 unless( can_load( modules => $use_list ) ) {
83285295
JB
1123 $self->_error(loc("You do not have '%1' installed - Please " .
1124 "install it as soon as possible.", 'Archive::Zip'));
f37a9f48 1125 return METHOD_NA;
520c99e2
JB
1126 }
1127
1128 my $zip = Archive::Zip->new();
1129
1130 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1131 return $self->_error(loc("Unable to read '%1'", $self->archive));
1132 }
1133
1134 my @files;
f37a9f48
CBW
1135
1136
e74f3fd4
JB
1137 ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1138 ### "In my BackPAN indexing, Archive::Zip was extracting things
1139 ### in my script's directory instead of the current working directory.
1140 ### I traced this back through Archive::Zip::_asLocalName which
1141 ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1142 ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1143 ### case, even though I think I'm on the same drive.
f37a9f48 1144 ###
e74f3fd4
JB
1145 ### To fix this, I pass the optional second argument to
1146 ### extractMember using the cwd from Archive::Extract." --bdfoy
1147
1148 ## store cwd() before looping; calls to cwd() can be expensive, and
1149 ### it won't change during the loop
1150 my $extract_dir = cwd();
f37a9f48 1151
e74f3fd4 1152 ### have to extract every member individually ###
520c99e2
JB
1153 for my $member ($zip->members) {
1154 push @files, $member->{fileName};
1155
eadbb00b 1156 ### file to extract to, to avoid the above problem
e74f3fd4 1157 my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
f37a9f48 1158
e74f3fd4 1159 unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
520c99e2
JB
1160 return $self->_error(loc("Extraction of '%1' from '%2' failed",
1161 $member->{fileName}, $self->archive ));
1162 }
1163 }
1164
1165 my $dir = $self->__get_extract_dir( \@files );
1166
1167 ### set what files where extract, and where they went ###
1168 $self->files( \@files );
1169 $self->extract_path( File::Spec->rel2abs($dir) );
1170
1171 return 1;
1172}
1173
1174sub __get_extract_dir {
1175 my $self = shift;
1176 my $files = shift || [];
1177
1178 return unless scalar @$files;
1179
1180 my($dir1, $dir2);
1181 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1182 my($dir,$pos) = @$aref;
1183
1184 ### add a catdir(), so that any trailing slashes get
1185 ### take care of (removed)
1186 ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1187 ### which was the problem in bug #23999
1188 my $res = -d $files->[$pos]
1189 ? File::Spec->catdir( $files->[$pos], '' )
f37a9f48 1190 : File::Spec->catdir( dirname( $files->[$pos] ) );
520c99e2
JB
1191
1192 $$dir = $res;
1193 }
1194
f37a9f48 1195 ### if the first and last dir don't match, make sure the
520c99e2
JB
1196 ### dirname is not set wrongly
1197 my $dir;
f37a9f48 1198
520c99e2
JB
1199 ### dirs are the same, so we know for sure what the extract dir is
1200 if( $dir1 eq $dir2 ) {
1201 $dir = $dir1;
f37a9f48 1202
520c99e2
JB
1203 ### dirs are different.. do they share the base dir?
1204 ### if so, use that, if not, fall back to '.'
1205 } else {
1206 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1207 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
f37a9f48
CBW
1208
1209 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1210 }
520c99e2
JB
1211
1212 return File::Spec->rel2abs( $dir );
1213}
1214
1215#################################
1216#
1217# Bunzip2 code
1218#
1219#################################
1220
520c99e2
JB
1221sub _bunzip2_bin {
1222 my $self = shift;
1223
1224 ### check for /bin/gzip -- we need it ###
83285295
JB
1225 unless( $self->bin_bunzip2 ) {
1226 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1227 return METHOD_NA;
f37a9f48 1228 }
520c99e2
JB
1229
1230 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1231 return $self->_error(loc("Could not open '%1' for writing: %2",
1232 $self->_gunzip_to, $! ));
f37a9f48 1233
9e5a0ef9
JB
1234 ### guard against broken bunzip2. See ->have_old_bunzip2()
1235 ### for details
1236 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1237 return $self->_error(loc("Your bunzip2 version is too old and ".
1238 "can only extract files ending in '%1'",
1239 '.bz2'));
1240 }
520c99e2 1241
9e5a0ef9 1242 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
520c99e2
JB
1243
1244 my $buffer;
1245 unless( scalar run( command => $cmd,
1246 verbose => $DEBUG,
1247 buffer => \$buffer )
1248 ) {
1249 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1250 $self->archive, $buffer));
1251 }
1252
1253 ### no buffers available?
1254 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1255 $self->_error( $self->_no_buffer_content( $self->archive ) );
1256 }
f37a9f48 1257
35fe4187 1258 $self->_print($fh, $buffer) if defined $buffer;
520c99e2
JB
1259
1260 close $fh;
1261
1262 ### set what files where extract, and where they went ###
1263 $self->files( [$self->_gunzip_to] );
1264 $self->extract_path( File::Spec->rel2abs(cwd()) );
1265
1266 return 1;
1267}
1268
1269### using cz2, the compact versions... this we use mainly in archive::tar
1270### extractor..
1271# sub _bunzip2_cz1 {
1272# my $self = shift;
f37a9f48 1273#
520c99e2
JB
1274# my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1275# unless( can_load( modules => $use_list ) ) {
1276# return $self->_error(loc("You do not have '%1' installed - Please " .
1277# "install it as soon as possible.",
1278# 'IO::Uncompress::Bunzip2'));
1279# }
f37a9f48 1280#
520c99e2
JB
1281# my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1282# return $self->_error(loc("Unable to open '%1': %2",
1283# $self->archive,
1284# $IO::Uncompress::Bunzip2::Bunzip2Error));
f37a9f48 1285#
520c99e2
JB
1286# my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1287# return $self->_error(loc("Could not open '%1' for writing: %2",
1288# $self->_gunzip_to, $! ));
f37a9f48 1289#
520c99e2
JB
1290# my $buffer;
1291# $fh->print($buffer) while $bz->read($buffer) > 0;
1292# $fh->close;
f37a9f48 1293#
520c99e2
JB
1294# ### set what files where extract, and where they went ###
1295# $self->files( [$self->_gunzip_to] );
1296# $self->extract_path( File::Spec->rel2abs(cwd()) );
f37a9f48 1297#
520c99e2
JB
1298# return 1;
1299# }
1300
83285295 1301sub _bunzip2_bz2 {
520c99e2
JB
1302 my $self = shift;
1303
1304 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1305 unless( can_load( modules => $use_list ) ) {
83285295
JB
1306 $self->_error(loc("You do not have '%1' installed - Please " .
1307 "install it as soon as possible.",
1308 'IO::Uncompress::Bunzip2'));
f37a9f48 1309 return METHOD_NA;
520c99e2
JB
1310 }
1311
1312 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1313 or return $self->_error(loc("Unable to uncompress '%1': %2",
1314 $self->archive,
1315 $IO::Uncompress::Bunzip2::Bunzip2Error));
1316
1317 ### set what files where extract, and where they went ###
1318 $self->files( [$self->_gunzip_to] );
1319 $self->extract_path( File::Spec->rel2abs(cwd()) );
1320
1321 return 1;
1322}
1323
d7f87992
CBW
1324#################################
1325#
1326# UnXz code
1327#
1328#################################
1329
1330sub _unxz_bin {
1331 my $self = shift;
1332
1333 ### check for /bin/unxz -- we need it ###
1334 unless( $self->bin_unxz ) {
1335 $self->_error(loc("No '%1' program found", '/bin/unxz'));
1336 return METHOD_NA;
1337 }
1338
1339 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1340 return $self->_error(loc("Could not open '%1' for writing: %2",
1341 $self->_gunzip_to, $! ));
1342
1343 my $cmd = [ $self->bin_unxz, '-cdf', $self->archive ];
1344
1345 my $buffer;
1346 unless( scalar run( command => $cmd,
1347 verbose => $DEBUG,
1348 buffer => \$buffer )
1349 ) {
1350 return $self->_error(loc("Unable to unxz '%1': %2",
1351 $self->archive, $buffer));
1352 }
1353
1354 ### no buffers available?
1355 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1356 $self->_error( $self->_no_buffer_content( $self->archive ) );
1357 }
1358
1359 $self->_print($fh, $buffer) if defined $buffer;
1360
1361 close $fh;
1362
1363 ### set what files where extract, and where they went ###
1364 $self->files( [$self->_gunzip_to] );
1365 $self->extract_path( File::Spec->rel2abs(cwd()) );
1366
1367 return 1;
1368}
1369
1370sub _unxz_cz {
1371 my $self = shift;
1372
1373 my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
1374 unless( can_load( modules => $use_list ) ) {
1375 $self->_error(loc("You do not have '%1' installed - Please " .
1376 "install it as soon as possible.",
1377 'IO::Uncompress::UnXz'));
f37a9f48 1378 return METHOD_NA;
d7f87992
CBW
1379 }
1380
1381 IO::Uncompress::UnXz::unxz($self->archive => $self->_gunzip_to)
1382 or return $self->_error(loc("Unable to uncompress '%1': %2",
1383 $self->archive,
1384 $IO::Uncompress::UnXz::UnXzError));
1385
1386 ### set what files where extract, and where they went ###
1387 $self->files( [$self->_gunzip_to] );
1388 $self->extract_path( File::Spec->rel2abs(cwd()) );
1389
1390 return 1;
1391}
1392
520c99e2
JB
1393
1394#################################
1395#
8d2ac73b
SP
1396# unlzma code
1397#
1398#################################
1399
8d2ac73b
SP
1400sub _unlzma_bin {
1401 my $self = shift;
1402
1403 ### check for /bin/unlzma -- we need it ###
83285295
JB
1404 unless( $self->bin_unlzma ) {
1405 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1406 return METHOD_NA;
f37a9f48 1407 }
8d2ac73b
SP
1408
1409 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1410 return $self->_error(loc("Could not open '%1' for writing: %2",
1411 $self->_gunzip_to, $! ));
1412
1413 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1414
1415 my $buffer;
1416 unless( scalar run( command => $cmd,
1417 verbose => $DEBUG,
1418 buffer => \$buffer )
1419 ) {
1420 return $self->_error(loc("Unable to unlzma '%1': %2",
1421 $self->archive, $buffer));
1422 }
1423
1424 ### no buffers available?
1425 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1426 $self->_error( $self->_no_buffer_content( $self->archive ) );
1427 }
1428
35fe4187 1429 $self->_print($fh, $buffer) if defined $buffer;
8d2ac73b
SP
1430
1431 close $fh;
1432
1433 ### set what files where extract, and where they went ###
1434 $self->files( [$self->_gunzip_to] );
1435 $self->extract_path( File::Spec->rel2abs(cwd()) );
1436
1437 return 1;
1438}
1439
1440sub _unlzma_cz {
1441 my $self = shift;
1442
d7f87992
CBW
1443 my $use_list1 = { 'IO::Uncompress::UnLzma' => '0.0' };
1444 my $use_list2 = { 'Compress::unLZMA' => '0.0' };
1445
1446 if (can_load( modules => $use_list1 ) ) {
1447 IO::Uncompress::UnLzma::unlzma($self->archive => $self->_gunzip_to)
1448 or return $self->_error(loc("Unable to uncompress '%1': %2",
1449 $self->archive,
1450 $IO::Uncompress::UnLzma::UnLzmaError));
8d2ac73b 1451 }
d7f87992 1452 elsif (can_load( modules => $use_list2 ) ) {
8d2ac73b 1453
d7f87992
CBW
1454 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1455 return $self->_error(loc("Could not open '%1' for writing: %2",
1456 $self->_gunzip_to, $! ));
8d2ac73b 1457
d7f87992
CBW
1458 my $buffer;
1459 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1460 unless ( defined $buffer ) {
1461 return $self->_error(loc("Could not unlzma '%1': %2",
1462 $self->archive, $@));
1463 }
8d2ac73b 1464
d7f87992 1465 $self->_print($fh, $buffer) if defined $buffer;
8d2ac73b 1466
d7f87992
CBW
1467 close $fh;
1468 }
1469 else {
1470 $self->_error(loc("You do not have '%1' or '%2' installed - Please " .
1471 "install it as soon as possible.", 'Compress::unLZMA', 'IO::Uncompress::UnLzma'));
f37a9f48 1472 return METHOD_NA;
d7f87992 1473 }
8d2ac73b
SP
1474
1475 ### set what files where extract, and where they went ###
1476 $self->files( [$self->_gunzip_to] );
1477 $self->extract_path( File::Spec->rel2abs(cwd()) );
1478
1479 return 1;
1480}
1481
1482#################################
1483#
520c99e2
JB
1484# Error code
1485#
1486#################################
1487
35fe4187
CBW
1488# For printing binaries that avoids interfering globals
1489sub _print {
1490 my $self = shift;
1491 my $fh = shift;
1492
1493 local( $\, $", $, ) = ( undef, ' ', '' );
1494 return print $fh @_;
1495}
1496
520c99e2
JB
1497sub _error {
1498 my $self = shift;
1499 my $error = shift;
83285295
JB
1500 my $lerror = Carp::longmess($error);
1501
1502 push @{$self->_error_msg}, $error;
1503 push @{$self->_error_msg_long}, $lerror;
f37a9f48 1504
520c99e2
JB
1505 ### set $Archive::Extract::WARN to 0 to disable printing
1506 ### of errors
1507 if( $WARN ) {
83285295 1508 carp $DEBUG ? $lerror : $error;
520c99e2
JB
1509 }
1510
1511 return;
1512}
1513
1514sub error {
1515 my $self = shift;
83285295
JB
1516
1517 ### make sure we have a fallback aref
f37a9f48
CBW
1518 my $aref = do {
1519 shift()
1520 ? $self->_error_msg_long
1521 : $self->_error_msg
83285295 1522 } || [];
f37a9f48 1523
83285295 1524 return join $/, @$aref;
520c99e2
JB
1525}
1526
a6696b92
CBW
1527=head2 debug( MESSAGE )
1528
1529This method outputs MESSAGE to the default filehandle if C<$DEBUG> is
1530true. It's a small method, but it's here if you'd like to subclass it
1531so you can so something else with any debugging output.
1532
1533=cut
1534
1535### this is really a stub for subclassing
1536sub debug {
1537 return unless $DEBUG;
1538
1539 print $_[1];
1540}
1541
520c99e2
JB
1542sub _no_buffer_files {
1543 my $self = shift;
1544 my $file = shift or return;
1545 return loc("No buffer captured, unable to tell ".
1546 "extracted files or extraction dir for '%1'", $file);
1547}
1548
1549sub _no_buffer_content {
1550 my $self = shift;
1551 my $file = shift or return;
1552 return loc("No buffer captured, unable to get content for '%1'", $file);
1553}
15541;
1555
1556=pod
1557
1558=head1 HOW IT WORKS
1559
1560C<Archive::Extract> tries first to determine what type of archive you
1561are passing it, by inspecting its suffix. It does not do this by using
1562Mime magic, or something related. See C<CAVEATS> below.
1563
1564Once it has determined the file type, it knows which extraction methods
1565it can use on the archive. It will try a perl solution first, then fall
1566back to a commandline tool if that fails. If that also fails, it will
1567return false, indicating it was unable to extract the archive.
1568See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1569
1570=head1 CAVEATS
1571
1572=head2 File Extensions
1573
1574C<Archive::Extract> trusts on the extension of the archive to determine
1575what type it is, and what extractor methods therefore can be used. If
1576your archives do not have any of the extensions as described in the
1577C<new()> method, you will have to specify the type explicitly, or
1578C<Archive::Extract> will not be able to extract the archive for you.
1579
9e5a0ef9
JB
1580=head2 Supporting Very Large Files
1581
1582C<Archive::Extract> can use either pure perl modules or command line
f37a9f48 1583programs under the hood. Some of the pure perl modules (like
8d2ac73b 1584C<Archive::Tar> and Compress::unLZMA) take the entire contents of the archive into memory,
9e5a0ef9
JB
1585which may not be feasible on your system. Consider setting the global
1586variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1587the use of command line programs and won't consume so much memory.
1588
1589See the C<GLOBAL VARIABLES> section below for details.
1590
1591=head2 Bunzip2 support of arbitrary extensions.
1592
f37a9f48 1593Older versions of C</bin/bunzip2> do not support arbitrary file
9e5a0ef9
JB
1594extensions and insist on a C<.bz2> suffix. Although we do our best
1595to guard against this, if you experience a bunzip2 error, it may
1596be related to this. For details, please see the C<have_old_bunzip2>
1597method.
1598
520c99e2
JB
1599=head1 GLOBAL VARIABLES
1600
1601=head2 $Archive::Extract::DEBUG
1602
1603Set this variable to C<true> to have all calls to command line tools
1604be printed out, including all their output.
1605This also enables C<Carp::longmess> errors, instead of the regular
1606C<carp> errors.
1607
1608Good for tracking down why things don't work with your particular
1609setup.
1610
1611Defaults to C<false>.
1612
1613=head2 $Archive::Extract::WARN
1614
1615This variable controls whether errors encountered internally by
1616C<Archive::Extract> should be C<carp>'d or not.
1617
1618Set to false to silence warnings. Inspect the output of the C<error()>
1619method manually to see what went wrong.
1620
1621Defaults to C<true>.
1622
1623=head2 $Archive::Extract::PREFER_BIN
1624
1625This variables controls whether C<Archive::Extract> should prefer the
1626use of perl modules, or commandline tools to extract archives.
1627
1628Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1629
1630Defaults to C<false>.
1631
0228b1bb 1632=head1 TODO / CAVEATS
520c99e2
JB
1633
1634=over 4
1635
1636=item Mime magic support
1637
1638Maybe this module should use something like C<File::Type> to determine
1639the type, rather than blindly trust the suffix.
1640
0228b1bb
JB
1641=item Thread safety
1642
1643Currently, C<Archive::Extract> does a C<chdir> to the extraction dir before
f37a9f48 1644extraction, and a C<chdir> back again after. This is not necessarily
0228b1bb
JB
1645thread safe. See C<rt.cpan.org> bug C<#45671> for details.
1646
1dae2fb5
RGS
1647=back
1648
574b415d 1649=head1 BUG REPORTS
520c99e2 1650
574b415d 1651Please report bugs or other issues to E<lt>bug-archive-extract@rt.cpan.org<gt>.
520c99e2 1652
574b415d
RGS
1653=head1 AUTHOR
1654
1655This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
520c99e2 1656
574b415d 1657=head1 COPYRIGHT
520c99e2 1658
f37a9f48 1659This library is free software; you may redistribute and/or modify it
574b415d 1660under the same terms as Perl itself.
520c99e2
JB
1661
1662=cut
1663
1664# Local variables:
1665# c-indentation-style: bsd
1666# c-basic-offset: 4
1667# indent-tabs-mode: nil
1668# End:
1669# vim: expandtab shiftwidth=4:
1670