This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Archive-Tar: customized version bump
[perl5.git] / cpan / Archive-Tar / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
f38c1908 2### http://www.gnu.org/software/tar/manual/tar.html
39713df4
RGS
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
642eb381
SH
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp qw(carp croak);
14use File::Spec ();
15use File::Spec::Unix ();
16use File::Path ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
39713df4
RGS
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
1c82faa7 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
501bd44a 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
178aef9a
RGS
27 ];
28
642eb381 29@ISA = qw[Exporter];
bef46b70 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
178aef9a
RGS
31$DEBUG = 0;
32$WARN = 1;
33$FOLLOW_SYMLINK = 0;
fc5b2724 34$VERSION = "2.14_01";
178aef9a
RGS
35$CHOWN = 1;
36$CHMOD = 1;
1c82faa7 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0;
178aef9a
RGS
38$DO_NOT_USE_PREFIX = 0;
39$INSECURE_EXTRACT_MODE = 0;
d33cd7cf 40$ZERO_PAD_NUMBERS = 0;
501bd44a 41$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
39713df4
RGS
42
43BEGIN {
44 use Config;
45 $HAS_PERLIO = $Config::Config{useperlio};
46
47 ### try and load IO::String anyway, so you can dynamically
48 ### switch between perlio and IO::String
642eb381 49 $HAS_IO_STRING = eval {
39713df4
RGS
50 require IO::String;
51 import IO::String;
642eb381
SH
52 1;
53 } || 0;
39713df4
RGS
54}
55
39713df4
RGS
56=head1 NAME
57
58Archive::Tar - module for manipulations of tar archives
59
60=head1 SYNOPSIS
61
62 use Archive::Tar;
63 my $tar = Archive::Tar->new;
64
642eb381 65 $tar->read('origin.tgz');
39713df4
RGS
66 $tar->extract();
67
68 $tar->add_files('file/foo.pl', 'docs/README');
69 $tar->add_data('file/baz.txt', 'This is the contents now');
70
71 $tar->rename('oldname', 'new/file/name');
93e94d8a
CBW
72 $tar->chown('/', 'root');
73 $tar->chown('/', 'root:root');
1e915955 74 $tar->chmod('/tmp', '1777');
39713df4 75
642eb381 76 $tar->write('files.tar'); # plain tar
bef46b70
JB
77 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed
78 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed
39713df4
RGS
79
80=head1 DESCRIPTION
81
82Archive::Tar provides an object oriented mechanism for handling tar
83files. It provides class methods for quick and easy files handling
84while also allowing for the creation of tar file objects for custom
85manipulation. If you have the IO::Zlib module installed,
86Archive::Tar will also support compressed or gzipped tar files.
87
88An object of class Archive::Tar represents a .tar(.gz) archive full
89of files and things.
90
91=head1 Object Methods
92
93=head2 Archive::Tar->new( [$file, $compressed] )
94
95Returns a new Tar object. If given any arguments, C<new()> calls the
96C<read()> method automatically, passing on the arguments provided to
97the C<read()> method.
98
99If C<new()> is invoked with arguments and the C<read()> method fails
100for any reason, C<new()> returns undef.
101
102=cut
103
104my $tmpl = {
105 _data => [ ],
106 _file => 'Unknown',
107};
108
109### install get/set accessors for this object.
110for my $key ( keys %$tmpl ) {
111 no strict 'refs';
112 *{__PACKAGE__."::$key"} = sub {
113 my $self = shift;
114 $self->{$key} = $_[0] if @_;
115 return $self->{$key};
116 }
117}
118
119sub new {
120 my $class = shift;
121 $class = ref $class if ref $class;
122
123 ### copying $tmpl here since a shallow copy makes it use the
124 ### same aref, causing for files to remain in memory always.
941cb2bb 125 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
39713df4
RGS
126
127 if (@_) {
81a5970e
RGS
128 unless ( $obj->read( @_ ) ) {
129 $obj->_error(qq[No data could be read from file]);
130 return;
131 }
39713df4
RGS
132 }
133
134 return $obj;
135}
136
642eb381 137=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
39713df4
RGS
138
139Read the given tar file into memory.
140The first argument can either be the name of a file or a reference to
141an already open filehandle (or an IO::Zlib object if it's compressed)
39713df4
RGS
142
143The C<read> will I<replace> any previous content in C<$tar>!
144
e0d68803 145The second argument may be considered optional, but remains for
642eb381
SH
146backwards compatibility. Archive::Tar now looks at the file
147magic to determine what class should be used to open the file
148and will transparently Do The Right Thing.
149
150Archive::Tar will warn if you try to pass a bzip2 compressed file and the
151IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
39713df4 152
b3200c5d 153Note that you can currently B<not> pass a C<gzip> compressed
642eb381
SH
154filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
155filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
b3200c5d
SP
156containing the full archive information (either compressed or
157uncompressed). These are worth while features, but not currently
158implemented. See the C<TODO> section.
159
39713df4
RGS
160The third argument can be a hash reference with options. Note that
161all options are case-sensitive.
162
163=over 4
164
165=item limit
166
167Do not read more than C<limit> files. This is useful if you have
168very big archives, and are only interested in the first few files.
169
642eb381
SH
170=item filter
171
172Can be set to a regular expression. Only files with names that match
173the expression will be read.
174
f8c9502f
CBW
175=item md5
176
177Set to 1 and the md5sum of files will be returned (instead of file data)
178 my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} );
179 while( my $f = $iter->() ) {
180 print $f->data . "\t" . $f->full_path . $/;
181 }
182
39713df4
RGS
183=item extract
184
185If set to true, immediately extract entries when reading them. This
186gives you the same memory break as the C<extract_archive> function.
187Note however that entries will not be read into memory, but written
e0d68803 188straight to disk. This means no C<Archive::Tar::File> objects are
642eb381 189created for you to inspect.
39713df4
RGS
190
191=back
192
193All files are stored internally as C<Archive::Tar::File> objects.
194Please consult the L<Archive::Tar::File> documentation for details.
195
196Returns the number of files read in scalar context, and a list of
197C<Archive::Tar::File> objects in list context.
198
199=cut
200
201sub read {
202 my $self = shift;
203 my $file = shift;
204 my $gzip = shift || 0;
205 my $opts = shift || {};
206
207 unless( defined $file ) {
208 $self->_error( qq[No file to read from!] );
209 return;
210 } else {
211 $self->_file( $file );
212 }
213
214 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
215 or return;
216
217 my $data = $self->_read_tar( $handle, $opts ) or return;
218
219 $self->_data( $data );
220
221 return wantarray ? @$data : scalar @$data;
222}
223
224sub _get_handle {
642eb381
SH
225 my $self = shift;
226 my $file = shift; return unless defined $file;
642eb381
SH
227 my $compress = shift || 0;
228 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
229
afabe0e8
CBW
230 ### Check if file is a file handle or IO glob
231 if ( ref $file ) {
232 return $file if eval{ *$file{IO} };
233 return $file if eval{ $file->isa(q{IO::Handle}) };
234 $file = q{}.$file;
235 }
642eb381
SH
236
237 ### get a FH opened to the right class, so we can use it transparently
238 ### throughout the program
239 my $fh;
240 { ### reading magic only makes sense if we're opening a file for
241 ### reading. otherwise, just use what the user requested.
242 my $magic = '';
243 if( MODE_READ->($mode) ) {
244 open my $tmp, $file or do {
245 $self->_error( qq[Could not open '$file' for reading: $!] );
246 return;
247 };
e0d68803 248
642eb381
SH
249 ### read the first 4 bites of the file to figure out which class to
250 ### use to open the file.
e0d68803 251 sysread( $tmp, $magic, 4 );
642eb381
SH
252 close $tmp;
253 }
39713df4 254
642eb381
SH
255 ### is it bzip?
256 ### if you asked specifically for bzip compression, or if we're in
257 ### read mode and the magic numbers add up, use bzip
258 if( BZIP and (
e0d68803 259 ($compress eq COMPRESS_BZIP) or
642eb381
SH
260 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
261 )
262 ) {
e0d68803 263
642eb381
SH
264 ### different reader/writer modules, different error vars... sigh
265 if( MODE_READ->($mode) ) {
266 $fh = IO::Uncompress::Bunzip2->new( $file ) or do {
267 $self->_error( qq[Could not read '$file': ] .
268 $IO::Uncompress::Bunzip2::Bunzip2Error
269 );
270 return;
271 };
e0d68803 272
642eb381
SH
273 } else {
274 $fh = IO::Compress::Bzip2->new( $file ) or do {
275 $self->_error( qq[Could not write to '$file': ] .
276 $IO::Compress::Bzip2::Bzip2Error
277 );
278 return;
279 };
280 }
e0d68803 281
642eb381
SH
282 ### is it gzip?
283 ### if you asked for compression, if you wanted to read or the gzip
284 ### magic number is present (redundant with read)
285 } elsif( ZLIB and (
286 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
e0d68803 287 )
642eb381
SH
288 ) {
289 $fh = IO::Zlib->new;
39713df4 290
642eb381
SH
291 unless( $fh->open( $file, $mode ) ) {
292 $self->_error(qq[Could not create filehandle for '$file': $!]);
293 return;
294 }
e0d68803 295
642eb381 296 ### is it plain tar?
39713df4 297 } else {
642eb381 298 $fh = IO::File->new;
39713df4 299
642eb381
SH
300 unless( $fh->open( $file, $mode ) ) {
301 $self->_error(qq[Could not create filehandle for '$file': $!]);
302 return;
303 }
39713df4 304
642eb381
SH
305 ### enable bin mode on tar archives
306 binmode $fh;
e0d68803 307 }
642eb381 308 }
39713df4
RGS
309
310 return $fh;
311}
312
642eb381 313
39713df4
RGS
314sub _read_tar {
315 my $self = shift;
316 my $handle = shift or return;
317 my $opts = shift || {};
318
319 my $count = $opts->{limit} || 0;
642eb381 320 my $filter = $opts->{filter};
f8c9502f 321 my $md5 = $opts->{md5} || 0; # cdrake
afabe0e8 322 my $filter_cb = $opts->{filter_cb};
39713df4
RGS
323 my $extract = $opts->{extract} || 0;
324
325 ### set a cap on the amount of files to extract ###
326 my $limit = 0;
327 $limit = 1 if $count > 0;
328
329 my $tarfile = [ ];
330 my $chunk;
331 my $read = 0;
332 my $real_name; # to set the name of a file when
333 # we're encountering @longlink
334 my $data;
335
336 LOOP:
337 while( $handle->read( $chunk, HEAD ) ) {
338 ### IO::Zlib doesn't support this yet
a1ba8df2
CBW
339 my $offset;
340 if ( ref($handle) ne 'IO::Zlib' ) {
341 local $@;
342 $offset = eval { tell $handle } || 'unknown';
343 $@ = '';
344 }
345 else {
346 $offset = 'unknown';
347 }
39713df4
RGS
348
349 unless( $read++ ) {
350 my $gzip = GZIP_MAGIC_NUM;
351 if( $chunk =~ /$gzip/ ) {
352 $self->_error( qq[Cannot read compressed format in tar-mode] );
353 return;
354 }
93e94d8a 355
4feb3b72
JB
356 ### size is < HEAD, which means a corrupted file, as the minimum
357 ### length is _at least_ HEAD
358 if (length $chunk != HEAD) {
359 $self->_error( qq[Cannot read enough bytes from the tarfile] );
360 return;
361 }
39713df4
RGS
362 }
363
364 ### if we can't read in all bytes... ###
365 last if length $chunk != HEAD;
366
367 ### Apparently this should really be two blocks of 512 zeroes,
368 ### but GNU tar sometimes gets it wrong. See comment in the
369 ### source code (tar.c) to GNU cpio.
370 next if $chunk eq TAR_END;
371
b30bcf62
RGS
372 ### according to the posix spec, the last 12 bytes of the header are
373 ### null bytes, to pad it to a 512 byte block. That means if these
eadbb00b 374 ### bytes are NOT null bytes, it's a corrupt header. See:
b30bcf62
RGS
375 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
376 ### line 111
377 { my $nulls = join '', "\0" x 12;
378 unless( $nulls eq substr( $chunk, 500, 12 ) ) {
379 $self->_error( qq[Invalid header block at offset $offset] );
380 next LOOP;
381 }
382 }
383
81a5970e
RGS
384 ### pass the realname, so we can set it 'proper' right away
385 ### some of the heuristics are done on the name, so important
386 ### to set it ASAP
39713df4 387 my $entry;
81a5970e
RGS
388 { my %extra_args = ();
389 $extra_args{'name'} = $$real_name if defined $real_name;
e0d68803
JB
390
391 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
392 %extra_args )
81a5970e
RGS
393 ) {
394 $self->_error( qq[Couldn't read chunk at offset $offset] );
b30bcf62 395 next LOOP;
81a5970e 396 }
39713df4
RGS
397 }
398
399 ### ignore labels:
d33cd7cf 400 ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
39713df4
RGS
401 next if $entry->is_label;
402
403 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
404
405 if ( $entry->is_file && !$entry->validate ) {
406 ### sometimes the chunk is rather fux0r3d and a whole 512
c3745331 407 ### bytes ends up in the ->name area.
39713df4
RGS
408 ### clean it up, if need be
409 my $name = $entry->name;
410 $name = substr($name, 0, 100) if length $name > 100;
411 $name =~ s/\n/ /g;
412
413 $self->_error( $name . qq[: checksum error] );
414 next LOOP;
415 }
416
417 my $block = BLOCK_SIZE->( $entry->size );
418
419 $data = $entry->get_content_by_ref;
420
afabe0e8 421 my $skip = 0;
f8c9502f 422 my $ctx; # cdrake
afabe0e8 423 ### skip this entry if we're filtering
f8c9502f
CBW
424
425 if($md5) { # cdrake
426 $ctx = Digest::MD5->new; # cdrake
427 $skip=5; # cdrake
428
429 } elsif ($filter && $entry->name !~ $filter) {
afabe0e8
CBW
430 $skip = 1;
431
c465fd2f
CBW
432 } elsif ($filter_cb && ! $filter_cb->($entry)) {
433 $skip = 2;
434
afabe0e8
CBW
435 ### skip this entry if it's a pax header. This is a special file added
436 ### by, among others, git-generated tarballs. It holds comments and is
437 ### not meant for extracting. See #38932: pax_global_header extracted
c3e6accb 438 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
afabe0e8
CBW
439 $skip = 3;
440 }
441
442 if ($skip) {
443 #
444 # Since we're skipping, do not allocate memory for the
93e94d8a 445 # whole file. Read it 64 BLOCKS at a time. Do not
afabe0e8
CBW
446 # complete the skip yet because maybe what we read is a
447 # longlink and it won't get skipped after all
448 #
449 my $amt = $block;
f8c9502f 450 my $fsz=$entry->size; # cdrake
afabe0e8
CBW
451 while ($amt > 0) {
452 $$data = '';
453 my $this = 64 * BLOCK;
454 $this = $amt if $this > $amt;
455 if( $handle->read( $$data, $this ) < $this ) {
456 $self->_error( qq[Read error on tarfile (missing data) '].
457 $entry->full_path ."' at offset $offset" );
458 next LOOP;
459 }
460 $amt -= $this;
f8c9502f
CBW
461 $fsz -= $this; # cdrake
462 substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake
463 $ctx->add($$data) if($skip==5); # cdrake
afabe0e8 464 }
f8c9502f 465 $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake
afabe0e8
CBW
466 } else {
467
468 ### just read everything into memory
469 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
470 ### this is because Compress::Zlib doesn't support it =/
471 ### this reads in the whole data in one read() call.
472 if ( $handle->read( $$data, $block ) < $block ) {
473 $self->_error( qq[Read error on tarfile (missing data) '].
39713df4 474 $entry->full_path ."' at offset $offset" );
afabe0e8
CBW
475 next LOOP;
476 }
477 ### throw away trailing garbage ###
478 substr ($$data, $entry->size) = "" if defined $$data;
39713df4
RGS
479 }
480
39713df4
RGS
481 ### part II of the @LongLink munging -- need to do /after/
482 ### the checksum check.
483 if( $entry->is_longlink ) {
484 ### weird thing in tarfiles -- if the file is actually a
485 ### @LongLink, the data part seems to have a trailing ^@
486 ### (unprintable) char. to display, pipe output through less.
487 ### but that doesn't *always* happen.. so check if the last
488 ### character is a control character, and if so remove it
489 ### at any rate, we better remove that character here, or tests
14f3ea66 490 ### like 'eq' and hash lookups based on names will SO not work
39713df4
RGS
491 ### remove it by calculating the proper size, and then
492 ### tossing out everything that's longer than that size.
493
494 ### count number of nulls
495 my $nulls = $$data =~ tr/\0/\0/;
496
497 ### cut data + size by that many bytes
498 $entry->size( $entry->size - $nulls );
499 substr ($$data, $entry->size) = "";
500 }
501 }
502
503 ### clean up of the entries.. posix tar /apparently/ has some
504 ### weird 'feature' that allows for filenames > 255 characters
505 ### they'll put a header in with as name '././@LongLink' and the
506 ### contents will be the name of the /next/ file in the archive
507 ### pretty crappy and kludgy if you ask me
508
509 ### set the name for the next entry if this is a @LongLink;
510 ### this is one ugly hack =/ but needed for direct extraction
511 if( $entry->is_longlink ) {
512 $real_name = $data;
b30bcf62 513 next LOOP;
39713df4
RGS
514 } elsif ( defined $real_name ) {
515 $entry->name( $$real_name );
516 $entry->prefix('');
517 undef $real_name;
518 }
519
afabe0e8
CBW
520 if ($filter && $entry->name !~ $filter) {
521 next LOOP;
e0d68803 522
c465fd2f
CBW
523 } elsif ($filter_cb && ! $filter_cb->($entry)) {
524 next LOOP;
525
afabe0e8
CBW
526 ### skip this entry if it's a pax header. This is a special file added
527 ### by, among others, git-generated tarballs. It holds comments and is
528 ### not meant for extracting. See #38932: pax_global_header extracted
c3e6accb 529 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
afabe0e8 530 next LOOP;
afabe0e8 531 }
e0d68803 532
d33cd7cf
CBW
533 if ( $extract && !$entry->is_longlink
534 && !$entry->is_unknown
535 && !$entry->is_label ) {
536 $self->_extract_file( $entry ) or return;
537 }
39713df4
RGS
538
539 ### Guard against tarfiles with garbage at the end
540 last LOOP if $entry->name eq '';
541
542 ### push only the name on the rv if we're extracting
543 ### -- for extract_archive
544 push @$tarfile, ($extract ? $entry->name : $entry);
545
546 if( $limit ) {
547 $count-- unless $entry->is_longlink || $entry->is_dir;
548 last LOOP unless $count;
549 }
550 } continue {
551 undef $data;
552 }
553
554 return $tarfile;
555}
556
557=head2 $tar->contains_file( $filename )
558
559Check if the archive contains a certain file.
560It will return true if the file is in the archive, false otherwise.
561
562Note however, that this function does an exact match using C<eq>
563on the full path. So it cannot compensate for case-insensitive file-
564systems or compare 2 paths to see if they would point to the same
565underlying file.
566
567=cut
568
569sub contains_file {
570 my $self = shift;
01d11a1c 571 my $full = shift;
e0d68803 572
01d11a1c 573 return unless defined $full;
39713df4 574
c3745331
RGS
575 ### don't warn if the entry isn't there.. that's what this function
576 ### is for after all.
577 local $WARN = 0;
39713df4
RGS
578 return 1 if $self->_find_entry($full);
579 return;
580}
581
582=head2 $tar->extract( [@filenames] )
583
584Write files whose names are equivalent to any of the names in
585C<@filenames> to disk, creating subdirectories as necessary. This
586might not work too well under VMS.
587Under MacPerl, the file's modification time will be converted to the
588MacOS zero of time, and appropriate conversions will be done to the
589path. However, the length of each element of the path is not
590inspected to see whether it's longer than MacOS currently allows (32
591characters).
592
593If C<extract> is called without a list of file names, the entire
594contents of the archive are extracted.
595
596Returns a list of filenames extracted.
597
598=cut
599
600sub extract {
601 my $self = shift;
b30bcf62 602 my @args = @_;
39713df4
RGS
603 my @files;
604
f38c1908
SP
605 # use the speed optimization for all extracted files
606 local($self->{cwd}) = cwd() unless $self->{cwd};
607
eadbb00b 608 ### you requested the extraction of only certain files
b30bcf62
RGS
609 if( @args ) {
610 for my $file ( @args ) {
e0d68803 611
b30bcf62
RGS
612 ### it's already an object?
613 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
614 push @files, $file;
615 next;
39713df4 616
b30bcf62
RGS
617 ### go find it then
618 } else {
e0d68803 619
b30bcf62
RGS
620 my $found;
621 for my $entry ( @{$self->_data} ) {
622 next unless $file eq $entry->full_path;
e0d68803 623
b30bcf62
RGS
624 ### we found the file you're looking for
625 push @files, $entry;
626 $found++;
627 }
e0d68803 628
b30bcf62 629 unless( $found ) {
e0d68803 630 return $self->_error(
b30bcf62
RGS
631 qq[Could not find '$file' in archive] );
632 }
39713df4
RGS
633 }
634 }
635
636 ### just grab all the file items
637 } else {
638 @files = $self->get_files;
639 }
640
641 ### nothing found? that's an error
642 unless( scalar @files ) {
643 $self->_error( qq[No files found for ] . $self->_file );
644 return;
645 }
646
647 ### now extract them
648 for my $entry ( @files ) {
649 unless( $self->_extract_file( $entry ) ) {
650 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
651 return;
652 }
653 }
654
655 return @files;
656}
657
658=head2 $tar->extract_file( $file, [$extract_path] )
659
660Write an entry, whose name is equivalent to the file name provided to
48e76d2d 661disk. Optionally takes a second parameter, which is the full native
39713df4
RGS
662path (including filename) the entry will be written to.
663
664For example:
665
666 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
667
b30bcf62
RGS
668 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' );
669
39713df4
RGS
670Returns true on success, false on failure.
671
672=cut
673
674sub extract_file {
675 my $self = shift;
01d11a1c 676 my $file = shift; return unless defined $file;
39713df4
RGS
677 my $alt = shift;
678
679 my $entry = $self->_find_entry( $file )
680 or $self->_error( qq[Could not find an entry for '$file'] ), return;
681
682 return $self->_extract_file( $entry, $alt );
683}
684
685sub _extract_file {
686 my $self = shift;
687 my $entry = shift or return;
688 my $alt = shift;
39713df4
RGS
689
690 ### you wanted an alternate extraction location ###
691 my $name = defined $alt ? $alt : $entry->full_path;
692
693 ### splitpath takes a bool at the end to indicate
694 ### that it's splitting a dir
7f10f74b
SH
695 my ($vol,$dirs,$file);
696 if ( defined $alt ) { # It's a local-OS path
697 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
698 $entry->is_dir );
699 } else {
700 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
701 $entry->is_dir );
702 }
703
39713df4
RGS
704 my $dir;
705 ### is $name an absolute path? ###
642eb381 706 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
178aef9a
RGS
707
708 ### absolute names are not allowed to be in tarballs under
709 ### strict mode, so only allow it if a user tells us to do it
710 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
e0d68803 711 $self->_error(
178aef9a
RGS
712 q[Entry ']. $entry->full_path .q[' is an absolute path. ].
713 q[Not extracting absolute paths under SECURE EXTRACT MODE]
e0d68803 714 );
178aef9a
RGS
715 return;
716 }
e0d68803 717
178aef9a 718 ### user asked us to, it's fine.
642eb381 719 $dir = File::Spec->catpath( $vol, $dirs, "" );
39713df4
RGS
720
721 ### it's a relative path ###
722 } else {
e0d68803
JB
723 my $cwd = (ref $self and defined $self->{cwd})
724 ? $self->{cwd}
642eb381 725 : cwd();
f5afd28d 726
f5afd28d
NC
727 my @dirs = defined $alt
728 ? File::Spec->splitdir( $dirs ) # It's a local-OS path
729 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely
730 # straight from the tarball
178aef9a 731
e0d68803
JB
732 if( not defined $alt and
733 not $INSECURE_EXTRACT_MODE
734 ) {
642eb381
SH
735
736 ### paths that leave the current directory are not allowed under
737 ### strict mode, so only allow it if a user tells us to do this.
738 if( grep { $_ eq '..' } @dirs ) {
e0d68803 739
642eb381
SH
740 $self->_error(
741 q[Entry ']. $entry->full_path .q[' is attempting to leave ].
742 q[the current working directory. Not extracting under ].
743 q[SECURE EXTRACT MODE]
744 );
745 return;
e0d68803
JB
746 }
747
642eb381
SH
748 ### the archive may be asking us to extract into a symlink. This
749 ### is not sane and a possible security issue, as outlined here:
750 ### https://rt.cpan.org/Ticket/Display.html?id=30380
751 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
752 ### https://issues.rpath.com/browse/RPL-1716
753 my $full_path = $cwd;
754 for my $d ( @dirs ) {
755 $full_path = File::Spec->catdir( $full_path, $d );
e0d68803 756
642eb381
SH
757 ### we've already checked this one, and it's safe. Move on.
758 next if ref $self and $self->{_link_cache}->{$full_path};
759
760 if( -l $full_path ) {
761 my $to = readlink $full_path;
762 my $diag = "symlinked directory ($full_path => $to)";
763
764 $self->_error(
765 q[Entry ']. $entry->full_path .q[' is attempting to ].
766 qq[extract to a $diag. This is considered a security ].
767 q[vulnerability and not allowed under SECURE EXTRACT ].
768 q[MODE]
769 );
770 return;
771 }
e0d68803 772
642eb381
SH
773 ### XXX keep a cache if possible, so the stats become cheaper:
774 $self->{_link_cache}->{$full_path} = 1 if ref $self;
775 }
776 }
777
2610e7a4
JB
778 ### '.' is the directory delimiter on VMS, which has to be escaped
779 ### or changed to '_' on vms. vmsify is used, because older versions
780 ### of vmspath do not handle this properly.
781 ### Must not add a '/' to an empty directory though.
e0d68803 782 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
f5afd28d 783
e0d68803 784 my ($cwd_vol,$cwd_dir,$cwd_file)
48e76d2d
CB
785 = File::Spec->splitpath( $cwd );
786 my @cwd = File::Spec->splitdir( $cwd_dir );
787 push @cwd, $cwd_file if length $cwd_file;
81a5970e 788
eadbb00b 789 ### We need to pass '' as the last element to catpath. Craig Berry
f5afd28d 790 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
e0d68803 791 ### The root problem is that splitpath on UNIX always returns the
f5afd28d
NC
792 ### final path element as a file even if it is a directory, and of
793 ### course there is no way it can know the difference without checking
794 ### against the filesystem, which it is documented as not doing. When
795 ### you turn around and call catpath, on VMS you have to know which bits
796 ### are directory bits and which bits are file bits. In this case we
797 ### know the result should be a directory. I had thought you could omit
798 ### the file argument to catpath in such a case, but apparently on UNIX
799 ### you can't.
e0d68803
JB
800 $dir = File::Spec->catpath(
801 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
f5afd28d
NC
802 );
803
e0d68803 804 ### catdir() returns undef if the path is longer than 255 chars on
2610e7a4 805 ### older VMS systems.
81a5970e
RGS
806 unless ( defined $dir ) {
807 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
808 return;
809 }
810
39713df4
RGS
811 }
812
813 if( -e $dir && !-d _ ) {
814 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
815 return;
816 }
817
818 unless ( -d _ ) {
819 eval { File::Path::mkpath( $dir, 0, 0777 ) };
820 if( $@ ) {
642eb381
SH
821 my $fp = $entry->full_path;
822 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
39713df4
RGS
823 return;
824 }
e0d68803 825
c3745331
RGS
826 ### XXX chown here? that might not be the same as in the archive
827 ### as we're only chown'ing to the owner of the file we're extracting
828 ### not to the owner of the directory itself, which may or may not
829 ### be another entry in the archive
830 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
831 ### way to go.
832 #if( $CHOWN && CAN_CHOWN ) {
833 # chown $entry->uid, $entry->gid, $dir or
834 # $self->_error( qq[Could not set uid/gid on '$dir'] );
835 #}
39713df4
RGS
836 }
837
838 ### we're done if we just needed to create a dir ###
839 return 1 if $entry->is_dir;
840
841 my $full = File::Spec->catfile( $dir, $file );
842
843 if( $entry->is_unknown ) {
844 $self->_error( qq[Unknown file type for file '$full'] );
845 return;
846 }
847
848 if( length $entry->type && $entry->is_file ) {
849 my $fh = IO::File->new;
850 $fh->open( '>' . $full ) or (
851 $self->_error( qq[Could not open file '$full': $!] ),
852 return
853 );
854
855 if( $entry->size ) {
856 binmode $fh;
857 syswrite $fh, $entry->data or (
858 $self->_error( qq[Could not write data to '$full'] ),
859 return
860 );
861 }
862
863 close $fh or (
864 $self->_error( qq[Could not close file '$full'] ),
865 return
866 );
867
868 } else {
869 $self->_make_special_file( $entry, $full ) or return;
870 }
871
642eb381
SH
872 ### only update the timestamp if it's not a symlink; that will change the
873 ### timestamp of the original. This addresses bug #33669: Could not update
874 ### timestamp warning on symlinks
875 if( not -l $full ) {
876 utime time, $entry->mtime - TIME_OFFSET, $full or
877 $self->_error( qq[Could not update timestamp] );
878 }
39713df4 879
7d69032b 880 if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
39713df4
RGS
881 chown $entry->uid, $entry->gid, $full or
882 $self->_error( qq[Could not set uid/gid on '$full'] );
883 }
884
885 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
886 ### change the perms on the file they're linking too...
887 if( $CHMOD and not -l $full ) {
1c82faa7
JB
888 my $mode = $entry->mode;
889 unless ($SAME_PERMISSIONS) {
890 $mode &= ~(oct(7000) | umask);
891 }
892 chmod $mode, $full or
39713df4
RGS
893 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
894 }
895
896 return 1;
897}
898
899sub _make_special_file {
900 my $self = shift;
901 my $entry = shift or return;
902 my $file = shift; return unless defined $file;
903
904 my $err;
905
906 if( $entry->is_symlink ) {
907 my $fail;
908 if( ON_UNIX ) {
909 symlink( $entry->linkname, $file ) or $fail++;
910
911 } else {
912 $self->_extract_special_file_as_plain_file( $entry, $file )
913 or $fail++;
914 }
915
642eb381
SH
916 $err = qq[Making symbolic link '$file' to '] .
917 $entry->linkname .q[' failed] if $fail;
39713df4
RGS
918
919 } elsif ( $entry->is_hardlink ) {
920 my $fail;
921 if( ON_UNIX ) {
922 link( $entry->linkname, $file ) or $fail++;
923
924 } else {
925 $self->_extract_special_file_as_plain_file( $entry, $file )
926 or $fail++;
927 }
928
929 $err = qq[Making hard link from '] . $entry->linkname .
930 qq[' to '$file' failed] if $fail;
931
932 } elsif ( $entry->is_fifo ) {
933 ON_UNIX && !system('mknod', $file, 'p') or
934 $err = qq[Making fifo ']. $entry->name .qq[' failed];
935
936 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
937 my $mode = $entry->is_blockdev ? 'b' : 'c';
938
939 ON_UNIX && !system('mknod', $file, $mode,
940 $entry->devmajor, $entry->devminor) or
941 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
942 $entry->devmajor . qq[ min=] . $entry->devminor .
943 qq[) failed.];
944
945 } elsif ( $entry->is_socket ) {
946 ### the original doesn't do anything special for sockets.... ###
947 1;
948 }
949
950 return $err ? $self->_error( $err ) : 1;
951}
952
953### don't know how to make symlinks, let's just extract the file as
954### a plain file
955sub _extract_special_file_as_plain_file {
956 my $self = shift;
957 my $entry = shift or return;
958 my $file = shift; return unless defined $file;
959
960 my $err;
961 TRY: {
501bd44a 962 my $orig = $self->_find_entry( $entry->linkname, $entry );
39713df4
RGS
963
964 unless( $orig ) {
965 $err = qq[Could not find file '] . $entry->linkname .
966 qq[' in memory.];
967 last TRY;
968 }
969
970 ### clone the entry, make it appear as a normal file ###
501bd44a 971 my $clone = $orig->clone;
39713df4
RGS
972 $clone->_downgrade_to_plainfile;
973 $self->_extract_file( $clone, $file ) or last TRY;
974
975 return 1;
976 }
977
978 return $self->_error($err);
979}
980
981=head2 $tar->list_files( [\@properties] )
982
983Returns a list of the names of all the files in the archive.
984
985If C<list_files()> is passed an array reference as its first argument
986it returns a list of hash references containing the requested
987properties of each file. The following list of properties is
988supported: name, size, mtime (last modified date), mode, uid, gid,
989linkname, uname, gname, devmajor, devminor, prefix.
990
991Passing an array reference containing only one element, 'name', is
992special cased to return a list of names rather than a list of hash
993references, making it equivalent to calling C<list_files> without
994arguments.
995
996=cut
997
998sub list_files {
999 my $self = shift;
1000 my $aref = shift || [ ];
1001
1002 unless( $self->_data ) {
1003 $self->read() or return;
1004 }
1005
1006 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
1007 return map { $_->full_path } @{$self->_data};
1008 } else {
1009
1010 #my @rv;
1011 #for my $obj ( @{$self->_data} ) {
1012 # push @rv, { map { $_ => $obj->$_() } @$aref };
1013 #}
1014 #return @rv;
1015
1016 ### this does the same as the above.. just needs a +{ }
1017 ### to make sure perl doesn't confuse it for a block
1018 return map { my $o=$_;
1019 +{ map { $_ => $o->$_() } @$aref }
1020 } @{$self->_data};
1021 }
1022}
1023
1024sub _find_entry {
1025 my $self = shift;
1026 my $file = shift;
1027
1028 unless( defined $file ) {
1029 $self->_error( qq[No file specified] );
1030 return;
1031 }
1032
b30bcf62
RGS
1033 ### it's an object already
1034 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1035
501bd44a
CBW
1036seach_entry:
1037 if($self->_data){
1038 for my $entry ( @{$self->_data} ) {
1039 my $path = $entry->full_path;
1040 return $entry if $path eq $file;
1041 }
1042 }
1043
1044 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1045 if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
1046 $file = _symlinks_resolver( $link_entry->name, $file );
1047 goto seach_entry if $self->_data;
1048
1049 #this will be slower than never, but won't failed!
1050
1051 my $iterargs = $link_entry->{'_archive'};
1052 if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
1053 #faster but whole archive will be read in memory
1054 #read whole archive and share data
1055 my $archive = Archive::Tar->new;
1056 $archive->read( @$iterargs );
1057 push @$iterargs, $archive; #take a trace for destruction
1058 if($archive->_data){
1059 $self->_data( $archive->_data );
1060 goto seach_entry;
1061 }
1062 }#faster
1063
1064 {#slower but lower memory usage
1065 # $iterargs = [$filename, $compressed, $opts];
1066 my $next = Archive::Tar->iter( @$iterargs );
1067 while(my $e = $next->()){
1068 if($e->full_path eq $file){
1069 undef $next;
1070 return $e;
1071 }
1072 }
1073 }#slower
1074 }
1075 }
39713df4
RGS
1076
1077 $self->_error( qq[No such file in archive: '$file'] );
1078 return;
1079}
1080
1081=head2 $tar->get_files( [@filenames] )
1082
1083Returns the C<Archive::Tar::File> objects matching the filenames
1084provided. If no filename list was passed, all C<Archive::Tar::File>
1085objects in the current Tar object are returned.
1086
1087Please refer to the C<Archive::Tar::File> documentation on how to
1088handle these objects.
1089
1090=cut
1091
1092sub get_files {
1093 my $self = shift;
1094
1095 return @{ $self->_data } unless @_;
1096
1097 my @list;
1098 for my $file ( @_ ) {
1099 push @list, grep { defined } $self->_find_entry( $file );
1100 }
1101
1102 return @list;
1103}
1104
1105=head2 $tar->get_content( $file )
1106
1107Return the content of the named file.
1108
1109=cut
1110
1111sub get_content {
1112 my $self = shift;
1113 my $entry = $self->_find_entry( shift ) or return;
1114
1115 return $entry->data;
1116}
1117
1118=head2 $tar->replace_content( $file, $content )
1119
1120Make the string $content be the content for the file named $file.
1121
1122=cut
1123
1124sub replace_content {
1125 my $self = shift;
1126 my $entry = $self->_find_entry( shift ) or return;
1127
1128 return $entry->replace_content( shift );
1129}
1130
1131=head2 $tar->rename( $file, $new_name )
1132
1133Rename the file of the in-memory archive to $new_name.
1134
1135Note that you must specify a Unix path for $new_name, since per tar
1136standard, all files in the archive must be Unix paths.
1137
1138Returns true on success and false on failure.
1139
1140=cut
1141
1142sub rename {
1143 my $self = shift;
1144 my $file = shift; return unless defined $file;
1145 my $new = shift; return unless defined $new;
1146
1147 my $entry = $self->_find_entry( $file ) or return;
1148
1149 return $entry->rename( $new );
1150}
1151
1e915955
CBW
1152=head2 $tar->chmod( $file, $mode )
1153
1154Change mode of $file to $mode.
1155
1156Returns true on success and false on failure.
1157
1158=cut
1159
1160sub chmod {
1161 my $self = shift;
1162 my $file = shift; return unless defined $file;
1163 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1164 my @args = ("$mode");
1165
1166 my $entry = $self->_find_entry( $file ) or return;
1167 my $x = $entry->chmod( @args );
1168 return $x;
1169}
1170
93e94d8a
CBW
1171=head2 $tar->chown( $file, $uname [, $gname] )
1172
1173Change owner $file to $uname and $gname.
1174
1175Returns true on success and false on failure.
1176
1177=cut
1178
1179sub chown {
1180 my $self = shift;
1181 my $file = shift; return unless defined $file;
1182 my $uname = shift; return unless defined $uname;
1183 my @args = ($uname);
1184 push(@args, shift);
1185
1186 my $entry = $self->_find_entry( $file ) or return;
1187 my $x = $entry->chown( @args );
1188 return $x;
1189}
1190
39713df4
RGS
1191=head2 $tar->remove (@filenamelist)
1192
1193Removes any entries with names matching any of the given filenames
1194from the in-memory archive. Returns a list of C<Archive::Tar::File>
1195objects that remain.
1196
1197=cut
1198
1199sub remove {
1200 my $self = shift;
1201 my @list = @_;
1202
1203 my %seen = map { $_->full_path => $_ } @{$self->_data};
1204 delete $seen{ $_ } for @list;
1205
1206 $self->_data( [values %seen] );
1207
1208 return values %seen;
1209}
1210
1211=head2 $tar->clear
1212
1213C<clear> clears the current in-memory archive. This effectively gives
1214you a 'blank' object, ready to be filled again. Note that C<clear>
1215only has effect on the object, not the underlying tarfile.
1216
1217=cut
1218
1219sub clear {
1220 my $self = shift or return;
1221
1222 $self->_data( [] );
1223 $self->_file( '' );
1224
1225 return 1;
1226}
1227
1228
1229=head2 $tar->write ( [$file, $compressed, $prefix] )
1230
1231Write the in-memory archive to disk. The first argument can either
1232be the name of a file or a reference to an already open filehandle (a
e0d68803 1233GLOB reference).
642eb381 1234
e0d68803 1235The second argument is used to indicate compression. You can either
642eb381 1236compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1237to be the C<gzip> compression level (between 1 and 9), but the use of
eadbb00b 1238constants is preferred:
642eb381
SH
1239
1240 # write a gzip compressed file
bef46b70 1241 $tar->write( 'out.tgz', COMPRESS_GZIP );
642eb381 1242
e0d68803 1243 # write a bzip compressed file
bef46b70 1244 $tar->write( 'out.tbz', COMPRESS_BZIP );
39713df4
RGS
1245
1246Note that when you pass in a filehandle, the compression argument
1247is ignored, as all files are printed verbatim to your filehandle.
1248If you wish to enable compression with filehandles, use an
642eb381 1249C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1250
1251The third argument is an optional prefix. All files will be tucked
1252away in the directory you specify as prefix. So if you have files
1253'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1254will be written to the archive as 'foo/a' and 'foo/b'.
1255
1256If no arguments are given, C<write> returns the entire formatted
1257archive as a string, which could be useful if you'd like to stuff the
1258archive into a socket or a pipe to gzip or something.
1259
642eb381 1260
39713df4
RGS
1261=cut
1262
1263sub write {
1264 my $self = shift;
1265 my $file = shift; $file = '' unless defined $file;
1266 my $gzip = shift || 0;
1267 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1268 my $dummy = '';
e0d68803 1269
39713df4
RGS
1270 ### only need a handle if we have a file to print to ###
1271 my $handle = length($file)
1272 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1273 or return )
1274 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
e0d68803 1275 : $HAS_IO_STRING ? IO::String->new
39713df4
RGS
1276 : __PACKAGE__->no_string_support();
1277
e0d68803
JB
1278 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1279 ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1280 ### printed to the archive
1281 local $\;
39713df4
RGS
1282
1283 for my $entry ( @{$self->_data} ) {
1284 ### entries to be written to the tarfile ###
1285 my @write_me;
1286
1287 ### only now will we change the object to reflect the current state
1288 ### of the name and prefix fields -- this needs to be limited to
1289 ### write() only!
1290 my $clone = $entry->clone;
1291
1292
e0d68803 1293 ### so, if you don't want use to use the prefix, we'll stuff
39713df4
RGS
1294 ### everything in the name field instead
1295 if( $DO_NOT_USE_PREFIX ) {
1296
1297 ### you might have an extended prefix, if so, set it in the clone
1298 ### XXX is ::Unix right?
1299 $clone->name( length $ext_prefix
1300 ? File::Spec::Unix->catdir( $ext_prefix,
1301 $clone->full_path)
1302 : $clone->full_path );
1303 $clone->prefix( '' );
1304
1305 ### otherwise, we'll have to set it properly -- prefix part in the
1306 ### prefix and name part in the name field.
1307 } else {
1308
1309 ### split them here, not before!
1310 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1311
1312 ### you might have an extended prefix, if so, set it in the clone
1313 ### XXX is ::Unix right?
1314 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1315 if length $ext_prefix;
1316
1317 $clone->prefix( $prefix );
1318 $clone->name( $name );
1319 }
1320
1321 ### names are too long, and will get truncated if we don't add a
1322 ### '@LongLink' file...
1323 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
1324 length($clone->prefix) > PREFIX_LENGTH
1325 ) || 0;
1326
1327 ### perhaps we need to make a longlink file?
1328 if( $make_longlink ) {
1329 my $longlink = Archive::Tar::File->new(
1330 data => LONGLINK_NAME,
1331 $clone->full_path,
1332 { type => LONGLINK }
1333 );
1334
1335 unless( $longlink ) {
1336 $self->_error( qq[Could not create 'LongLink' entry for ] .
1337 qq[oversize file '] . $clone->full_path ."'" );
1338 return;
1339 };
1340
1341 push @write_me, $longlink;
1342 }
1343
1344 push @write_me, $clone;
1345
1346 ### write the one, optionally 2 a::t::file objects to the handle
1347 for my $clone (@write_me) {
1348
1349 ### if the file is a symlink, there are 2 options:
1350 ### either we leave the symlink intact, but then we don't write any
1351 ### data OR we follow the symlink, which means we actually make a
1352 ### copy. if we do the latter, we have to change the TYPE of the
1353 ### clone to 'FILE'
1354 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1355 my $data_ok = !$clone->is_symlink && $clone->has_content;
1356
1357 ### downgrade to a 'normal' file if it's a symlink we're going to
1358 ### treat as a regular file
1359 $clone->_downgrade_to_plainfile if $link_ok;
1360
1361 ### get the header for this block
1362 my $header = $self->_format_tar_entry( $clone );
1363 unless( $header ) {
1364 $self->_error(q[Could not format header for: ] .
1365 $clone->full_path );
1366 return;
1367 }
1368
1369 unless( print $handle $header ) {
1370 $self->_error(q[Could not write header for: ] .
1371 $clone->full_path);
1372 return;
1373 }
1374
1375 if( $link_ok or $data_ok ) {
1376 unless( print $handle $clone->data ) {
1377 $self->_error(q[Could not write data for: ] .
1378 $clone->full_path);
1379 return;
1380 }
1381
1382 ### pad the end of the clone if required ###
1383 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1384 }
1385
1386 } ### done writing these entries
1387 }
1388
1389 ### write the end markers ###
1390 print $handle TAR_END x 2 or
1391 return $self->_error( qq[Could not write tar end markers] );
b30bcf62 1392
39713df4 1393 ### did you want it written to a file, or returned as a string? ###
b30bcf62 1394 my $rv = length($file) ? 1
39713df4 1395 : $HAS_PERLIO ? $dummy
b30bcf62
RGS
1396 : do { seek $handle, 0, 0; local $/; <$handle> };
1397
d33cd7cf 1398 ### make sure to close the handle if we created it
afabe0e8
CBW
1399 if ( $file ne $handle ) {
1400 unless( close $handle ) {
1401 $self->_error( qq[Could not write tar] );
1402 return;
1403 }
1404 }
e0d68803 1405
b30bcf62 1406 return $rv;
39713df4
RGS
1407}
1408
1409sub _format_tar_entry {
1410 my $self = shift;
1411 my $entry = shift or return;
1412 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1413 my $no_prefix = shift || 0;
1414
1415 my $file = $entry->name;
1416 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1417
1418 ### remove the prefix from the file name
eadbb00b 1419 ### not sure if this is still needed --kane
39713df4
RGS
1420 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1421 ### this for us. Even worse, this would break if we tried to add a file
1422 ### like x/x.
1423 #if( length $prefix ) {
1424 # $file =~ s/^$match//;
1425 #}
1426
1427 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1428 if length $ext_prefix;
1429
1430 ### not sure why this is... ###
1431 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1432 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1433
d33cd7cf 1434 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
39713df4
RGS
1435
1436 ### this might be optimizable with a 'changed' flag in the file objects ###
1437 my $tar = pack (
1438 PACK,
1439 $file,
1440
1441 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1442 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1443
1444 "", # checksum field - space padded a bit down
1445
1446 (map { $entry->$_() } qw[type linkname magic]),
1447
1448 $entry->version || TAR_VERSION,
1449
1450 (map { $entry->$_() } qw[uname gname]),
1451 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1452
1453 ($no_prefix ? '' : $prefix)
1454 );
1455
1456 ### add the checksum ###
d33cd7cf 1457 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
39713df4
RGS
1458 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1459
1460 return $tar;
1461}
1462
1463=head2 $tar->add_files( @filenamelist )
1464
1465Takes a list of filenames and adds them to the in-memory archive.
1466
1467The path to the file is automatically converted to a Unix like
1468equivalent for use in the archive, and, if on MacOS, the file's
1469modification time is converted from the MacOS epoch to the Unix epoch.
1470So tar archives created on MacOS with B<Archive::Tar> can be read
1471both with I<tar> on Unix and applications like I<suntar> or
1472I<Stuffit Expander> on MacOS.
1473
1474Be aware that the file's type/creator and resource fork will be lost,
1475which is usually what you want in cross-platform archives.
1476
2610e7a4
JB
1477Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1478object from, for example, another archive. The object will be clone, and
1479effectively be a copy of the original, not an alias.
1480
39713df4
RGS
1481Returns a list of C<Archive::Tar::File> objects that were just added.
1482
1483=cut
1484
1485sub add_files {
1486 my $self = shift;
1487 my @files = @_ or return;
1488
1489 my @rv;
1490 for my $file ( @files ) {
2610e7a4
JB
1491
1492 ### you passed an Archive::Tar::File object
1493 ### clone it so we don't accidentally have a reference to
1494 ### an object from another archive
1495 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
e0d68803 1496 push @rv, $file->clone;
2610e7a4
JB
1497 next;
1498 }
e0d68803 1499
a1ba8df2
CBW
1500 eval {
1501 if( utf8::is_utf8( $file )) {
1502 utf8::encode( $file );
1503 }
1504 };
1505
c3745331 1506 unless( -e $file || -l $file ) {
39713df4
RGS
1507 $self->_error( qq[No such file: '$file'] );
1508 next;
1509 }
1510
1511 my $obj = Archive::Tar::File->new( file => $file );
1512 unless( $obj ) {
1513 $self->_error( qq[Unable to add file: '$file'] );
1514 next;
1515 }
1516
1517 push @rv, $obj;
1518 }
1519
1520 push @{$self->{_data}}, @rv;
1521
1522 return @rv;
1523}
1524
1525=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1526
1527Takes a filename, a scalar full of data and optionally a reference to
1528a hash with specific options.
1529
1530Will add a file to the in-memory archive, with name C<$filename> and
1531content C<$data>. Specific properties can be set using C<$opthashref>.
1532The following list of properties is supported: name, size, mtime
1533(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1534devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4
RGS
1535modification times are converted to Unix equivalents.)
1536
5ac8c252
CBW
1537Valid values for the file type are the following constants defined by
1538Archive::Tar::Constant:
b3200c5d
SP
1539
1540=over 4
1541
1542=item FILE
1543
1544Regular file.
1545
1546=item HARDLINK
1547
1548=item SYMLINK
1549
1550Hard and symbolic ("soft") links; linkname should specify target.
1551
1552=item CHARDEV
1553
1554=item BLOCKDEV
1555
1556Character and block devices. devmajor and devminor should specify the major
1557and minor device numbers.
1558
1559=item DIR
1560
1561Directory.
1562
1563=item FIFO
1564
1565FIFO (named pipe).
1566
1567=item SOCKET
1568
1569Socket.
1570
1571=back
1572
39713df4
RGS
1573Returns the C<Archive::Tar::File> object that was just added, or
1574C<undef> on failure.
1575
1576=cut
1577
1578sub add_data {
1579 my $self = shift;
1580 my ($file, $data, $opt) = @_;
1581
1582 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1583 unless( $obj ) {
1584 $self->_error( qq[Unable to add file: '$file'] );
1585 return;
1586 }
1587
1588 push @{$self->{_data}}, $obj;
1589
1590 return $obj;
1591}
1592
1593=head2 $tar->error( [$BOOL] )
1594
14f3ea66 1595Returns the current error string (usually, the last error reported).
39713df4
RGS
1596If a true value was specified, it will give the C<Carp::longmess>
1597equivalent of the error, in effect giving you a stacktrace.
1598
1599For backwards compatibility, this error is also available as
1600C<$Archive::Tar::error> although it is much recommended you use the
1601method call instead.
1602
1603=cut
1604
1605{
1606 $error = '';
1607 my $longmess;
1608
1609 sub _error {
1610 my $self = shift;
1611 my $msg = $error = shift;
1612 $longmess = Carp::longmess($error);
941cb2bb
CBW
1613 if (ref $self) {
1614 $self->{_error} = $error;
1615 $self->{_longmess} = $longmess;
1616 }
39713df4
RGS
1617
1618 ### set Archive::Tar::WARN to 0 to disable printing
1619 ### of errors
1620 if( $WARN ) {
1621 carp $DEBUG ? $longmess : $msg;
1622 }
1623
1624 return;
1625 }
1626
1627 sub error {
1628 my $self = shift;
941cb2bb
CBW
1629 if (ref $self) {
1630 return shift() ? $self->{_longmess} : $self->{_error};
1631 } else {
1632 return shift() ? $longmess : $error;
1633 }
39713df4
RGS
1634 }
1635}
1636
f38c1908
SP
1637=head2 $tar->setcwd( $cwd );
1638
1639C<Archive::Tar> needs to know the current directory, and it will run
e0d68803 1640C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
f38c1908 1641tarfile and saves it in the file system. (As of version 1.30, however,
e0d68803 1642C<Archive::Tar> will use the speed optimization described below
f38c1908
SP
1643automatically, so it's only relevant if you're using C<extract_file()>).
1644
1645Since C<Archive::Tar> doesn't change the current directory internally
1646while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1647can be avoided if we can guarantee that the current directory doesn't
1648get changed externally.
1649
1650To use this performance boost, set the current directory via
1651
1652 use Cwd;
1653 $tar->setcwd( cwd() );
1654
1655once before calling a function like C<extract_file> and
1656C<Archive::Tar> will use the current directory setting from then on
e0d68803 1657and won't call C<Cwd::cwd()> internally.
f38c1908
SP
1658
1659To switch back to the default behaviour, use
1660
1661 $tar->setcwd( undef );
1662
1663and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1664
eadbb00b 1665If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
f38c1908
SP
1666be called for you.
1667
e0d68803 1668=cut
f38c1908
SP
1669
1670sub setcwd {
1671 my $self = shift;
1672 my $cwd = shift;
1673
1674 $self->{cwd} = $cwd;
1675}
39713df4 1676
39713df4
RGS
1677=head1 Class Methods
1678
642eb381 1679=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
39713df4
RGS
1680
1681Creates a tar file from the list of files provided. The first
1682argument can either be the name of the tar file to create or a
1683reference to an open file handle (e.g. a GLOB reference).
1684
e0d68803 1685The second argument is used to indicate compression. You can either
642eb381 1686compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1687to be the C<gzip> compression level (between 1 and 9), but the use of
eadbb00b 1688constants is preferred:
642eb381
SH
1689
1690 # write a gzip compressed file
bef46b70 1691 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
642eb381 1692
e0d68803 1693 # write a bzip compressed file
bef46b70 1694 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
39713df4
RGS
1695
1696Note that when you pass in a filehandle, the compression argument
1697is ignored, as all files are printed verbatim to your filehandle.
1698If you wish to enable compression with filehandles, use an
642eb381 1699C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1700
1701The remaining arguments list the files to be included in the tar file.
1702These files must all exist. Any files which don't exist or can't be
1703read are silently ignored.
1704
1705If the archive creation fails for any reason, C<create_archive> will
1706return false. Please use the C<error> method to find the cause of the
1707failure.
1708
1709Note that this method does not write C<on the fly> as it were; it
1710still reads all the files into memory before writing out the archive.
1711Consult the FAQ below if this is a problem.
1712
1713=cut
1714
1715sub create_archive {
1716 my $class = shift;
1717
1718 my $file = shift; return unless defined $file;
1719 my $gzip = shift || 0;
1720 my @files = @_;
1721
1722 unless( @files ) {
1723 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1724 }
1725
1726 my $tar = $class->new;
1727 $tar->add_files( @files );
1728 return $tar->write( $file, $gzip );
1729}
1730
642eb381
SH
1731=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1732
1733Returns an iterator function that reads the tar file without loading
1734it all in memory. Each time the function is called it will return the
1735next file in the tarball. The files are returned as
1736C<Archive::Tar::File> objects. The iterator function returns the
941cb2bb 1737empty list once it has exhausted the files contained.
642eb381
SH
1738
1739The second argument can be a hash reference with options, which are
1740identical to the arguments passed to C<read()>.
1741
1742Example usage:
1743
1744 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1745
1746 while( my $f = $next->() ) {
1747 print $f->name, "\n";
1748
1749 $f->extract or warn "Extraction failed";
e0d68803 1750
642eb381
SH
1751 # ....
1752 }
1753
1754=cut
1755
1756
1757sub iter {
1758 my $class = shift;
1759 my $filename = shift or return;
93e94d8a 1760 my $compressed = shift || 0;
642eb381
SH
1761 my $opts = shift || {};
1762
1763 ### get a handle to read from.
1764 my $handle = $class->_get_handle(
e0d68803
JB
1765 $filename,
1766 $compressed,
642eb381
SH
1767 READ_ONLY->( ZLIB )
1768 ) or return;
1769
1770 my @data;
501bd44a 1771 my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
642eb381
SH
1772 return sub {
1773 return shift(@data) if @data; # more than one file returned?
1774 return unless $handle; # handle exhausted?
1775
1776 ### read data, should only return file
941cb2bb
CBW
1777 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1778 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
501bd44a
CBW
1779 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1780 foreach(@data){
1781 #may refine this heuristic for ON_UNIX?
1782 if($_->linkname){
1783 #is there a better slot to store/share it ?
1784 $_->{'_archive'} = $CONSTRUCT_ARGS;
1785 }
1786 }
1787 }
642eb381
SH
1788
1789 ### return one piece of data
1790 return shift(@data) if @data;
e0d68803 1791
642eb381
SH
1792 ### data is exhausted, free the filehandle
1793 undef $handle;
501bd44a
CBW
1794 if(@$CONSTRUCT_ARGS == 4){
1795 #free archive in memory
1796 undef $CONSTRUCT_ARGS->[-1];
1797 }
642eb381
SH
1798 return;
1799 };
1800}
1801
1802=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
39713df4
RGS
1803
1804Returns a list of the names of all the files in the archive. The
1805first argument can either be the name of the tar file to list or a
1806reference to an open file handle (e.g. a GLOB reference).
1807
1808If C<list_archive()> is passed an array reference as its third
1809argument it returns a list of hash references containing the requested
1810properties of each file. The following list of properties is
e0d68803 1811supported: full_path, name, size, mtime (last modified date), mode,
5ac8c252 1812uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
b3200c5d
SP
1813
1814See C<Archive::Tar::File> for details about supported properties.
39713df4
RGS
1815
1816Passing an array reference containing only one element, 'name', is
1817special cased to return a list of names rather than a list of hash
1818references.
1819
1820=cut
1821
1822sub list_archive {
1823 my $class = shift;
1824 my $file = shift; return unless defined $file;
1825 my $gzip = shift || 0;
1826
1827 my $tar = $class->new($file, $gzip);
1828 return unless $tar;
1829
1830 return $tar->list_files( @_ );
1831}
1832
642eb381 1833=head2 Archive::Tar->extract_archive($file, $compressed)
39713df4
RGS
1834
1835Extracts the contents of the tar file. The first argument can either
1836be the name of the tar file to create or a reference to an open file
1837handle (e.g. a GLOB reference). All relative paths in the tar file will
1838be created underneath the current working directory.
1839
1840C<extract_archive> will return a list of files it extracted.
1841If the archive extraction fails for any reason, C<extract_archive>
1842will return false. Please use the C<error> method to find the cause
1843of the failure.
1844
1845=cut
1846
1847sub extract_archive {
1848 my $class = shift;
1849 my $file = shift; return unless defined $file;
1850 my $gzip = shift || 0;
1851
1852 my $tar = $class->new( ) or return;
1853
1854 return $tar->read( $file, $gzip, { extract => 1 } );
1855}
1856
f5695358
JB
1857=head2 $bool = Archive::Tar->has_io_string
1858
1859Returns true if we currently have C<IO::String> support loaded.
1860
e0d68803 1861Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1862stringified archives. Currently, C<perlio> is the preferred method, if
1863available.
1864
1865See the C<GLOBAL VARIABLES> section to see how to change this preference.
1866
1867=cut
1868
1869sub has_io_string { return $HAS_IO_STRING; }
1870
1871=head2 $bool = Archive::Tar->has_perlio
1872
1873Returns true if we currently have C<perlio> support loaded.
1874
e0d68803 1875This requires C<perl-5.8> or higher, compiled with C<perlio>
f5695358 1876
e0d68803 1877Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1878stringified archives. Currently, C<perlio> is the preferred method, if
1879available.
1880
1881See the C<GLOBAL VARIABLES> section to see how to change this preference.
1882
1883=cut
1884
1885sub has_perlio { return $HAS_PERLIO; }
1886
1887=head2 $bool = Archive::Tar->has_zlib_support
1888
1889Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1890
1891=cut
1892
1893sub has_zlib_support { return ZLIB }
1894
1895=head2 $bool = Archive::Tar->has_bzip2_support
1896
1897Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1898
1899=cut
1900
1901sub has_bzip2_support { return BZIP }
1902
39713df4
RGS
1903=head2 Archive::Tar->can_handle_compressed_files
1904
1905A simple checking routine, which will return true if C<Archive::Tar>
642eb381
SH
1906is able to uncompress compressed archives on the fly with C<IO::Zlib>
1907and C<IO::Compress::Bzip2> or false if not both are installed.
39713df4
RGS
1908
1909You can use this as a shortcut to determine whether C<Archive::Tar>
1910will do what you think before passing compressed archives to its
1911C<read> method.
1912
1913=cut
1914
642eb381 1915sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
39713df4
RGS
1916
1917sub no_string_support {
1918 croak("You have to install IO::String to support writing archives to strings");
1919}
1920
501bd44a
CBW
1921sub _symlinks_resolver{
1922 my ($src, $trg) = @_;
1923 my @src = split /[\/\\]/, $src;
1924 my @trg = split /[\/\\]/, $trg;
1925 pop @src; #strip out current object name
1926 if(@trg and $trg[0] eq ''){
1927 shift @trg;
1928 #restart path from scratch
1929 @src = ( );
1930 }
1931 foreach my $part ( @trg ){
1932 next if $part eq '.'; #ignore current
1933 if($part eq '..'){
1934 #got to parent
1935 pop @src;
1936 }
1937 else{
1938 #append it
1939 push @src, $part;
1940 }
1941 }
1942 my $path = join('/', @src);
1943 warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
1944 return $path;
1945}
1946
39713df4
RGS
19471;
1948
1949__END__
1950
1951=head1 GLOBAL VARIABLES
1952
1953=head2 $Archive::Tar::FOLLOW_SYMLINK
1954
1955Set this variable to C<1> to make C<Archive::Tar> effectively make a
1956copy of the file when extracting. Default is C<0>, which
1957means the symlink stays intact. Of course, you will have to pack the
1958file linked to as well.
1959
1960This option is checked when you write out the tarfile using C<write>
1961or C<create_archive>.
1962
1963This works just like C</bin/tar>'s C<-h> option.
1964
1965=head2 $Archive::Tar::CHOWN
1966
1967By default, C<Archive::Tar> will try to C<chown> your files if it is
1968able to. In some cases, this may not be desired. In that case, set
1969this variable to C<0> to disable C<chown>-ing, even if it were
1970possible.
1971
1972The default is C<1>.
1973
1974=head2 $Archive::Tar::CHMOD
1975
1976By default, C<Archive::Tar> will try to C<chmod> your files to
1977whatever mode was specified for the particular file in the archive.
1978In some cases, this may not be desired. In that case, set this
1979variable to C<0> to disable C<chmod>-ing.
1980
1981The default is C<1>.
1982
1c82faa7
JB
1983=head2 $Archive::Tar::SAME_PERMISSIONS
1984
1985When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
1986the permissions on files from the archive are used without modification
1987of if they are filtered by removing any setid bits and applying the
1988current umask.
1989
1990The default is C<1> for the root user and C<0> for normal users.
1991
39713df4
RGS
1992=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1993
e0d68803 1994By default, C<Archive::Tar> will try to put paths that are over
f38c1908 1995100 characters in the C<prefix> field of your tar header, as
e0d68803
JB
1996defined per POSIX-standard. However, some (older) tar programs
1997do not implement this spec. To retain compatibility with these older
1998or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1999variable to a true value, and C<Archive::Tar> will use an alternate
2000way of dealing with paths over 100 characters by using the
f38c1908
SP
2001C<GNU Extended Header> feature.
2002
2003Note that clients who do not support the C<GNU Extended Header>
2004feature will not be able to read these archives. Such clients include
2005tars on C<Solaris>, C<Irix> and C<AIX>.
39713df4
RGS
2006
2007The default is C<0>.
2008
2009=head2 $Archive::Tar::DEBUG
2010
2011Set this variable to C<1> to always get the C<Carp::longmess> output
2012of the warnings, instead of the regular C<carp>. This is the same
2013message you would get by doing:
2014
2015 $tar->error(1);
2016
2017Defaults to C<0>.
2018
2019=head2 $Archive::Tar::WARN
2020
2021Set this variable to C<0> if you do not want any warnings printed.
2022Personally I recommend against doing this, but people asked for the
2023option. Also, be advised that this is of course not threadsafe.
2024
2025Defaults to C<1>.
2026
2027=head2 $Archive::Tar::error
2028
2029Holds the last reported error. Kept for historical reasons, but its
2030use is very much discouraged. Use the C<error()> method instead:
2031
2032 warn $tar->error unless $tar->extract;
2033
941cb2bb
CBW
2034Note that in older versions of this module, the C<error()> method
2035would return an effectively global value even when called an instance
2036method as above. This has since been fixed, and multiple instances of
2037C<Archive::Tar> now have separate error strings.
2038
178aef9a
RGS
2039=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
2040
2041This variable indicates whether C<Archive::Tar> should allow
2042files to be extracted outside their current working directory.
2043
2044Allowing this could have security implications, as a malicious
2045tar archive could alter or replace any file the extracting user
e0d68803
JB
2046has permissions to. Therefor, the default is to not allow
2047insecure extractions.
178aef9a 2048
e0d68803
JB
2049If you trust the archive, or have other reasons to allow the
2050archive to write files outside your current working directory,
178aef9a
RGS
2051set this variable to C<true>.
2052
2053Note that this is a backwards incompatible change from version
2054C<1.36> and before.
2055
39713df4
RGS
2056=head2 $Archive::Tar::HAS_PERLIO
2057
e0d68803 2058This variable holds a boolean indicating if we currently have
39713df4 2059C<perlio> support loaded. This will be enabled for any perl
e0d68803 2060greater than C<5.8> compiled with C<perlio>.
39713df4
RGS
2061
2062If you feel strongly about disabling it, set this variable to
2063C<false>. Note that you will then need C<IO::String> installed
2064to support writing stringified archives.
2065
2066Don't change this variable unless you B<really> know what you're
2067doing.
2068
2069=head2 $Archive::Tar::HAS_IO_STRING
2070
e0d68803 2071This variable holds a boolean indicating if we currently have
39713df4
RGS
2072C<IO::String> support loaded. This will be enabled for any perl
2073that has a loadable C<IO::String> module.
2074
2075If you feel strongly about disabling it, set this variable to
2076C<false>. Note that you will then need C<perlio> support from
2077your perl to be able to write stringified archives.
2078
2079Don't change this variable unless you B<really> know what you're
2080doing.
2081
d33cd7cf
CBW
2082=head2 $Archive::Tar::ZERO_PAD_NUMBERS
2083
2084This variable holds a boolean indicating if we will create
93e94d8a 2085zero padded numbers for C<size>, C<mtime> and C<checksum>.
d33cd7cf
CBW
2086The default is C<0>, indicating that we will create space padded
2087numbers. Added for compatibility with C<busybox> implementations.
2088
501bd44a
CBW
2089=head2 Tuning the way RESOLVE_SYMLINK will works
2090
2091 You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
2092 or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
2093
2094 Values can be one of the following:
2095
2096 none
2097 Disable this mechanism and failed as it was in previous version (<1.88)
2098
2099 speed (default)
2100 If you prefer speed
2101 this will read again the whole archive using read() so all entries
2102 will be available
2103
2104 memory
2105 If you prefer memory
2106
2107 Limitation
2108
2109 It won't work for terminal, pipe or sockets or every non seekable source.
2110
2111=cut
2112
39713df4
RGS
2113=head1 FAQ
2114
2115=over 4
2116
2117=item What's the minimum perl version required to run Archive::Tar?
2118
2119You will need perl version 5.005_03 or newer.
2120
2121=item Isn't Archive::Tar slow?
2122
2123Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2124However, it's very portable. If speed is an issue, consider using
2125C</bin/tar> instead.
2126
2127=item Isn't Archive::Tar heavier on memory than /bin/tar?
2128
2129Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
2130C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2131choice but to read the archive into memory.
2132This is ok if you want to do in-memory manipulation of the archive.
642eb381 2133
39713df4
RGS
2134If you just want to extract, use the C<extract_archive> class method
2135instead. It will optimize and write to disk immediately.
2136
642eb381
SH
2137Another option is to use the C<iter> class method to iterate over
2138the files in the tarball without reading them all in memory at once.
2139
2140=item Can you lazy-load data instead?
39713df4 2141
642eb381
SH
2142In some cases, yes. You can use the C<iter> class method to iterate
2143over the files in the tarball without reading them all in memory at once.
39713df4
RGS
2144
2145=item How much memory will an X kb tar file need?
2146
2147Probably more than X kb, since it will all be read into memory. If
2148this is a problem, and you don't need to do in memory manipulation
e0d68803 2149of the archive, consider using the C<iter> class method, or C</bin/tar>
642eb381 2150instead.
39713df4
RGS
2151
2152=item What do you do with unsupported filetypes in an archive?
2153
2154C<Unix> has a few filetypes that aren't supported on other platforms,
2155like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2156try to make a copy of the original file, rather than throwing an error.
2157
2158This does require you to read the entire archive in to memory first,
2159since otherwise we wouldn't know what data to fill the copy with.
e0d68803
JB
2160(This means that you cannot use the class methods, including C<iter>
2161on archives that have incompatible filetypes and still expect things
642eb381 2162to work).
39713df4
RGS
2163
2164For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2165the extraction of this particular item didn't work.
2166
f38c1908
SP
2167=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2168
2169By default, C<Archive::Tar> is in a completely POSIX-compatible
2170mode, which uses the POSIX-specification of C<tar> to store files.
eadbb00b 2171For paths greater than 100 characters, this is done using the
f38c1908
SP
2172C<POSIX header prefix>. Non-POSIX-compatible clients may not support
2173this part of the specification, and may only support the C<GNU Extended
2174Header> functionality. To facilitate those clients, you can set the
e0d68803 2175C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
f38c1908
SP
2176C<GLOBAL VARIABLES> section for details on this variable.
2177
c3745331
RGS
2178Note that GNU tar earlier than version 1.14 does not cope well with
2179the C<POSIX header prefix>. If you use such a version, consider setting
2180the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2181
b30bcf62
RGS
2182=item How do I extract only files that have property X from an archive?
2183
2184Sometimes, you might not wish to extract a complete archive, just
2185the files that are relevant to you, based on some criteria.
2186
2187You can do this by filtering a list of C<Archive::Tar::File> objects
2188based on your criteria. For example, to extract only files that have
2189the string C<foo> in their title, you would use:
2190
e0d68803 2191 $tar->extract(
b30bcf62 2192 grep { $_->full_path =~ /foo/ } $tar->get_files
e0d68803 2193 );
b30bcf62
RGS
2194
2195This way, you can filter on any attribute of the files in the archive.
2196Consult the C<Archive::Tar::File> documentation on how to use these
2197objects.
2198
81a5970e
RGS
2199=item How do I access .tar.Z files?
2200
2201The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2202the C<IO::Zlib> module) to access tar files that have been compressed
2203with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2204utility cannot be read by C<Compress::Zlib> and so cannot be directly
2205accesses by C<Archive::Tar>.
2206
2207If the C<uncompress> or C<gunzip> programs are available, you can use
2208one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2209
2210Firstly with C<uncompress>
2211
2212 use Archive::Tar;
2213
2214 open F, "uncompress -c $filename |";
2215 my $tar = Archive::Tar->new(*F);
2216 ...
2217
2218and this with C<gunzip>
2219
2220 use Archive::Tar;
2221
2222 open F, "gunzip -c $filename |";
2223 my $tar = Archive::Tar->new(*F);
2224 ...
2225
2226Similarly, if the C<compress> program is available, you can use this to
2227write a C<.tar.Z> file
2228
2229 use Archive::Tar;
2230 use IO::File;
2231
2232 my $fh = new IO::File "| compress -c >$filename";
2233 my $tar = Archive::Tar->new();
2234 ...
2235 $tar->write($fh);
2236 $fh->close ;
2237
01d11a1c
SP
2238=item How do I handle Unicode strings?
2239
2240C<Archive::Tar> uses byte semantics for any files it reads from or writes
2241to disk. This is not a problem if you only deal with files and never
2242look at their content or work solely with byte strings. But if you use
2243Unicode strings with character semantics, some additional steps need
2244to be taken.
2245
2246For example, if you add a Unicode string like
2247
2248 # Problem
2249 $tar->add_data('file.txt', "Euro: \x{20AC}");
2250
2251then there will be a problem later when the tarfile gets written out
2252to disk via C<$tar->write()>:
2253
2254 Wide character in print at .../Archive/Tar.pm line 1014.
2255
2256The data was added as a Unicode string and when writing it out to disk,
2257the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2258tried to convert the string to ISO-8859 and failed. The written file
2259now contains garbage.
2260
2261For this reason, Unicode strings need to be converted to UTF-8-encoded
2262bytestrings before they are handed off to C<add_data()>:
2263
2264 use Encode;
2265 my $data = "Accented character: \x{20AC}";
2266 $data = encode('utf8', $data);
2267
2268 $tar->add_data('file.txt', $data);
2269
e0d68803 2270A opposite problem occurs if you extract a UTF8-encoded file from a
01d11a1c
SP
2271tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2272will return its content as a bytestring, not as a Unicode string.
2273
2274If you want it to be a Unicode string (because you want character
2275semantics with operations like regular expression matching), you need
e0d68803 2276to decode the UTF8-encoded content and have Perl convert it into
01d11a1c
SP
2277a Unicode string:
2278
2279 use Encode;
2280 my $data = $tar->get_content();
e0d68803 2281
01d11a1c
SP
2282 # Make it a Unicode string
2283 $data = decode('utf8', $data);
2284
e0d68803 2285There is no easy way to provide this functionality in C<Archive::Tar>,
01d11a1c
SP
2286because a tarball can contain many files, and each of which could be
2287encoded in a different way.
81a5970e 2288
39713df4
RGS
2289=back
2290
f475b4a2
JB
2291=head1 CAVEATS
2292
93e94d8a 2293The AIX tar does not fill all unused space in the tar archive with 0x00.
f475b4a2
JB
2294This sometimes leads to warning messages from C<Archive::Tar>.
2295
2296 Invalid header block at offset nnn
2297
2298A fix for that problem is scheduled to be released in the following levels
2299of AIX, all of which should be coming out in the 4th quarter of 2009:
2300
2301 AIX 5.3 TL7 SP10
2302 AIX 5.3 TL8 SP8
2303 AIX 5.3 TL9 SP5
2304 AIX 5.3 TL10 SP2
93e94d8a 2305
f475b4a2
JB
2306 AIX 6.1 TL0 SP11
2307 AIX 6.1 TL1 SP7
2308 AIX 6.1 TL2 SP6
2309 AIX 6.1 TL3 SP3
2310
93e94d8a
CBW
2311The IBM APAR number for this problem is IZ50240 (Reported component ID:
23125765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
f475b4a2
JB
2313If you need an ifix please contact your local IBM AIX support.
2314
39713df4
RGS
2315=head1 TODO
2316
2317=over 4
2318
2319=item Check if passed in handles are open for read/write
2320
2321Currently I don't know of any portable pure perl way to do this.
2322Suggestions welcome.
2323
b3200c5d
SP
2324=item Allow archives to be passed in as string
2325
2326Currently, we only allow opened filehandles or filenames, but
2327not strings. The internals would need some reworking to facilitate
2328stringified archives.
2329
2330=item Facilitate processing an opened filehandle of a compressed archive
2331
2332Currently, we only support this if the filehandle is an IO::Zlib object.
2333Environments, like apache, will present you with an opened filehandle
2334to an uploaded file, which might be a compressed archive.
2335
39713df4
RGS
2336=back
2337
f38c1908
SP
2338=head1 SEE ALSO
2339
2340=over 4
2341
2342=item The GNU tar specification
2343
2344C<http://www.gnu.org/software/tar/manual/tar.html>
2345
eadbb00b 2346=item The PAX format specification
f38c1908 2347
eadbb00b 2348The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
f38c1908
SP
2349
2350=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2351
2352=item GNU tar intends to switch to POSIX compatibility
2353
2354GNU Tar authors have expressed their intention to become completely
2355POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2356
2357=item A Comparison between various tar implementations
2358
2359Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2360
2361=back
2362
39713df4
RGS
2363=head1 AUTHOR
2364
c3745331
RGS
2365This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2366
2367Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
39713df4
RGS
2368
2369=head1 ACKNOWLEDGEMENTS
2370
f475b4a2
JB
2371Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2372Rainer Tammer and especially Andrew Savige for their help and suggestions.
39713df4
RGS
2373
2374=head1 COPYRIGHT
2375
f475b4a2 2376This module is copyright (c) 2002 - 2009 Jos Boumans
c3745331 2377E<lt>kane@cpan.orgE<gt>. All rights reserved.
39713df4 2378
e0d68803 2379This library is free software; you may redistribute and/or modify
c3745331 2380it under the same terms as Perl itself.
39713df4
RGS
2381
2382=cut