This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Archive-Tar to CPAN version 1.76
[perl5.git] / cpan / Archive-Tar / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
f38c1908 2### http://www.gnu.org/software/tar/manual/tar.html
39713df4
RGS
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
642eb381
SH
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp qw(carp croak);
14use File::Spec ();
15use File::Spec::Unix ();
16use File::Path ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
39713df4
RGS
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
1c82faa7 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
d33cd7cf 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT
178aef9a
RGS
27 ];
28
642eb381 29@ISA = qw[Exporter];
bef46b70 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
178aef9a
RGS
31$DEBUG = 0;
32$WARN = 1;
33$FOLLOW_SYMLINK = 0;
89ae55b4 34$VERSION = "1.76";
178aef9a
RGS
35$CHOWN = 1;
36$CHMOD = 1;
1c82faa7 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0;
178aef9a
RGS
38$DO_NOT_USE_PREFIX = 0;
39$INSECURE_EXTRACT_MODE = 0;
d33cd7cf 40$ZERO_PAD_NUMBERS = 0;
39713df4
RGS
41
42BEGIN {
43 use Config;
44 $HAS_PERLIO = $Config::Config{useperlio};
45
46 ### try and load IO::String anyway, so you can dynamically
47 ### switch between perlio and IO::String
642eb381 48 $HAS_IO_STRING = eval {
39713df4
RGS
49 require IO::String;
50 import IO::String;
642eb381
SH
51 1;
52 } || 0;
39713df4
RGS
53}
54
39713df4
RGS
55=head1 NAME
56
57Archive::Tar - module for manipulations of tar archives
58
59=head1 SYNOPSIS
60
61 use Archive::Tar;
62 my $tar = Archive::Tar->new;
63
642eb381 64 $tar->read('origin.tgz');
39713df4
RGS
65 $tar->extract();
66
67 $tar->add_files('file/foo.pl', 'docs/README');
68 $tar->add_data('file/baz.txt', 'This is the contents now');
69
70 $tar->rename('oldname', 'new/file/name');
71
642eb381 72 $tar->write('files.tar'); # plain tar
bef46b70
JB
73 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed
74 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed
39713df4
RGS
75
76=head1 DESCRIPTION
77
78Archive::Tar provides an object oriented mechanism for handling tar
79files. It provides class methods for quick and easy files handling
80while also allowing for the creation of tar file objects for custom
81manipulation. If you have the IO::Zlib module installed,
82Archive::Tar will also support compressed or gzipped tar files.
83
84An object of class Archive::Tar represents a .tar(.gz) archive full
85of files and things.
86
87=head1 Object Methods
88
89=head2 Archive::Tar->new( [$file, $compressed] )
90
91Returns a new Tar object. If given any arguments, C<new()> calls the
92C<read()> method automatically, passing on the arguments provided to
93the C<read()> method.
94
95If C<new()> is invoked with arguments and the C<read()> method fails
96for any reason, C<new()> returns undef.
97
98=cut
99
100my $tmpl = {
101 _data => [ ],
102 _file => 'Unknown',
103};
104
105### install get/set accessors for this object.
106for my $key ( keys %$tmpl ) {
107 no strict 'refs';
108 *{__PACKAGE__."::$key"} = sub {
109 my $self = shift;
110 $self->{$key} = $_[0] if @_;
111 return $self->{$key};
112 }
113}
114
115sub new {
116 my $class = shift;
117 $class = ref $class if ref $class;
118
119 ### copying $tmpl here since a shallow copy makes it use the
120 ### same aref, causing for files to remain in memory always.
941cb2bb 121 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
39713df4
RGS
122
123 if (@_) {
81a5970e
RGS
124 unless ( $obj->read( @_ ) ) {
125 $obj->_error(qq[No data could be read from file]);
126 return;
127 }
39713df4
RGS
128 }
129
130 return $obj;
131}
132
642eb381 133=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
39713df4
RGS
134
135Read the given tar file into memory.
136The first argument can either be the name of a file or a reference to
137an already open filehandle (or an IO::Zlib object if it's compressed)
39713df4
RGS
138
139The C<read> will I<replace> any previous content in C<$tar>!
140
e0d68803 141The second argument may be considered optional, but remains for
642eb381
SH
142backwards compatibility. Archive::Tar now looks at the file
143magic to determine what class should be used to open the file
144and will transparently Do The Right Thing.
145
146Archive::Tar will warn if you try to pass a bzip2 compressed file and the
147IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
39713df4 148
b3200c5d 149Note that you can currently B<not> pass a C<gzip> compressed
642eb381
SH
150filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
151filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
b3200c5d
SP
152containing the full archive information (either compressed or
153uncompressed). These are worth while features, but not currently
154implemented. See the C<TODO> section.
155
39713df4
RGS
156The third argument can be a hash reference with options. Note that
157all options are case-sensitive.
158
159=over 4
160
161=item limit
162
163Do not read more than C<limit> files. This is useful if you have
164very big archives, and are only interested in the first few files.
165
642eb381
SH
166=item filter
167
168Can be set to a regular expression. Only files with names that match
169the expression will be read.
170
39713df4
RGS
171=item extract
172
173If set to true, immediately extract entries when reading them. This
174gives you the same memory break as the C<extract_archive> function.
175Note however that entries will not be read into memory, but written
e0d68803 176straight to disk. This means no C<Archive::Tar::File> objects are
642eb381 177created for you to inspect.
39713df4
RGS
178
179=back
180
181All files are stored internally as C<Archive::Tar::File> objects.
182Please consult the L<Archive::Tar::File> documentation for details.
183
184Returns the number of files read in scalar context, and a list of
185C<Archive::Tar::File> objects in list context.
186
187=cut
188
189sub read {
190 my $self = shift;
191 my $file = shift;
192 my $gzip = shift || 0;
193 my $opts = shift || {};
194
195 unless( defined $file ) {
196 $self->_error( qq[No file to read from!] );
197 return;
198 } else {
199 $self->_file( $file );
200 }
201
202 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
203 or return;
204
205 my $data = $self->_read_tar( $handle, $opts ) or return;
206
207 $self->_data( $data );
208
209 return wantarray ? @$data : scalar @$data;
210}
211
212sub _get_handle {
642eb381
SH
213 my $self = shift;
214 my $file = shift; return unless defined $file;
642eb381
SH
215 my $compress = shift || 0;
216 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
217
afabe0e8
CBW
218 ### Check if file is a file handle or IO glob
219 if ( ref $file ) {
220 return $file if eval{ *$file{IO} };
221 return $file if eval{ $file->isa(q{IO::Handle}) };
222 $file = q{}.$file;
223 }
642eb381
SH
224
225 ### get a FH opened to the right class, so we can use it transparently
226 ### throughout the program
227 my $fh;
228 { ### reading magic only makes sense if we're opening a file for
229 ### reading. otherwise, just use what the user requested.
230 my $magic = '';
231 if( MODE_READ->($mode) ) {
232 open my $tmp, $file or do {
233 $self->_error( qq[Could not open '$file' for reading: $!] );
234 return;
235 };
e0d68803 236
642eb381
SH
237 ### read the first 4 bites of the file to figure out which class to
238 ### use to open the file.
e0d68803 239 sysread( $tmp, $magic, 4 );
642eb381
SH
240 close $tmp;
241 }
39713df4 242
642eb381
SH
243 ### is it bzip?
244 ### if you asked specifically for bzip compression, or if we're in
245 ### read mode and the magic numbers add up, use bzip
246 if( BZIP and (
e0d68803 247 ($compress eq COMPRESS_BZIP) or
642eb381
SH
248 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
249 )
250 ) {
e0d68803 251
642eb381
SH
252 ### different reader/writer modules, different error vars... sigh
253 if( MODE_READ->($mode) ) {
254 $fh = IO::Uncompress::Bunzip2->new( $file ) or do {
255 $self->_error( qq[Could not read '$file': ] .
256 $IO::Uncompress::Bunzip2::Bunzip2Error
257 );
258 return;
259 };
e0d68803 260
642eb381
SH
261 } else {
262 $fh = IO::Compress::Bzip2->new( $file ) or do {
263 $self->_error( qq[Could not write to '$file': ] .
264 $IO::Compress::Bzip2::Bzip2Error
265 );
266 return;
267 };
268 }
e0d68803 269
642eb381
SH
270 ### is it gzip?
271 ### if you asked for compression, if you wanted to read or the gzip
272 ### magic number is present (redundant with read)
273 } elsif( ZLIB and (
274 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
e0d68803 275 )
642eb381
SH
276 ) {
277 $fh = IO::Zlib->new;
39713df4 278
642eb381
SH
279 unless( $fh->open( $file, $mode ) ) {
280 $self->_error(qq[Could not create filehandle for '$file': $!]);
281 return;
282 }
e0d68803 283
642eb381 284 ### is it plain tar?
39713df4 285 } else {
642eb381 286 $fh = IO::File->new;
39713df4 287
642eb381
SH
288 unless( $fh->open( $file, $mode ) ) {
289 $self->_error(qq[Could not create filehandle for '$file': $!]);
290 return;
291 }
39713df4 292
642eb381
SH
293 ### enable bin mode on tar archives
294 binmode $fh;
e0d68803 295 }
642eb381 296 }
39713df4
RGS
297
298 return $fh;
299}
300
642eb381 301
39713df4
RGS
302sub _read_tar {
303 my $self = shift;
304 my $handle = shift or return;
305 my $opts = shift || {};
306
307 my $count = $opts->{limit} || 0;
642eb381 308 my $filter = $opts->{filter};
afabe0e8 309 my $filter_cb = $opts->{filter_cb};
39713df4
RGS
310 my $extract = $opts->{extract} || 0;
311
312 ### set a cap on the amount of files to extract ###
313 my $limit = 0;
314 $limit = 1 if $count > 0;
315
316 my $tarfile = [ ];
317 my $chunk;
318 my $read = 0;
319 my $real_name; # to set the name of a file when
320 # we're encountering @longlink
321 my $data;
322
323 LOOP:
324 while( $handle->read( $chunk, HEAD ) ) {
325 ### IO::Zlib doesn't support this yet
326 my $offset = eval { tell $handle } || 'unknown';
d33cd7cf 327 $@ = '';
39713df4
RGS
328
329 unless( $read++ ) {
330 my $gzip = GZIP_MAGIC_NUM;
331 if( $chunk =~ /$gzip/ ) {
332 $self->_error( qq[Cannot read compressed format in tar-mode] );
333 return;
334 }
4feb3b72
JB
335
336 ### size is < HEAD, which means a corrupted file, as the minimum
337 ### length is _at least_ HEAD
338 if (length $chunk != HEAD) {
339 $self->_error( qq[Cannot read enough bytes from the tarfile] );
340 return;
341 }
39713df4
RGS
342 }
343
344 ### if we can't read in all bytes... ###
345 last if length $chunk != HEAD;
346
347 ### Apparently this should really be two blocks of 512 zeroes,
348 ### but GNU tar sometimes gets it wrong. See comment in the
349 ### source code (tar.c) to GNU cpio.
350 next if $chunk eq TAR_END;
351
b30bcf62
RGS
352 ### according to the posix spec, the last 12 bytes of the header are
353 ### null bytes, to pad it to a 512 byte block. That means if these
eadbb00b 354 ### bytes are NOT null bytes, it's a corrupt header. See:
b30bcf62
RGS
355 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
356 ### line 111
357 { my $nulls = join '', "\0" x 12;
358 unless( $nulls eq substr( $chunk, 500, 12 ) ) {
359 $self->_error( qq[Invalid header block at offset $offset] );
360 next LOOP;
361 }
362 }
363
81a5970e
RGS
364 ### pass the realname, so we can set it 'proper' right away
365 ### some of the heuristics are done on the name, so important
366 ### to set it ASAP
39713df4 367 my $entry;
81a5970e
RGS
368 { my %extra_args = ();
369 $extra_args{'name'} = $$real_name if defined $real_name;
e0d68803
JB
370
371 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
372 %extra_args )
81a5970e
RGS
373 ) {
374 $self->_error( qq[Couldn't read chunk at offset $offset] );
b30bcf62 375 next LOOP;
81a5970e 376 }
39713df4
RGS
377 }
378
379 ### ignore labels:
d33cd7cf 380 ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
39713df4
RGS
381 next if $entry->is_label;
382
383 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
384
385 if ( $entry->is_file && !$entry->validate ) {
386 ### sometimes the chunk is rather fux0r3d and a whole 512
c3745331 387 ### bytes ends up in the ->name area.
39713df4
RGS
388 ### clean it up, if need be
389 my $name = $entry->name;
390 $name = substr($name, 0, 100) if length $name > 100;
391 $name =~ s/\n/ /g;
392
393 $self->_error( $name . qq[: checksum error] );
394 next LOOP;
395 }
396
397 my $block = BLOCK_SIZE->( $entry->size );
398
399 $data = $entry->get_content_by_ref;
400
afabe0e8
CBW
401 my $skip = 0;
402 ### skip this entry if we're filtering
403 if ($filter && $entry->name !~ $filter) {
404 $skip = 1;
405
406 ### skip this entry if it's a pax header. This is a special file added
407 ### by, among others, git-generated tarballs. It holds comments and is
408 ### not meant for extracting. See #38932: pax_global_header extracted
c3e6accb 409 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
afabe0e8
CBW
410 $skip = 2;
411 } elsif ($filter_cb && ! $filter_cb->($entry)) {
412 $skip = 3;
413 }
414
415 if ($skip) {
416 #
417 # Since we're skipping, do not allocate memory for the
418 # whole file. Read it 64 BLOCKS at a time. Do not
419 # complete the skip yet because maybe what we read is a
420 # longlink and it won't get skipped after all
421 #
422 my $amt = $block;
423 while ($amt > 0) {
424 $$data = '';
425 my $this = 64 * BLOCK;
426 $this = $amt if $this > $amt;
427 if( $handle->read( $$data, $this ) < $this ) {
428 $self->_error( qq[Read error on tarfile (missing data) '].
429 $entry->full_path ."' at offset $offset" );
430 next LOOP;
431 }
432 $amt -= $this;
433 }
434 ### throw away trailing garbage ###
435 substr ($$data, $entry->size) = "" if defined $$data && $block < 64 * BLOCK;
436 } else {
437
438 ### just read everything into memory
439 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
440 ### this is because Compress::Zlib doesn't support it =/
441 ### this reads in the whole data in one read() call.
442 if ( $handle->read( $$data, $block ) < $block ) {
443 $self->_error( qq[Read error on tarfile (missing data) '].
39713df4 444 $entry->full_path ."' at offset $offset" );
afabe0e8
CBW
445 next LOOP;
446 }
447 ### throw away trailing garbage ###
448 substr ($$data, $entry->size) = "" if defined $$data;
39713df4
RGS
449 }
450
39713df4
RGS
451 ### part II of the @LongLink munging -- need to do /after/
452 ### the checksum check.
453 if( $entry->is_longlink ) {
454 ### weird thing in tarfiles -- if the file is actually a
455 ### @LongLink, the data part seems to have a trailing ^@
456 ### (unprintable) char. to display, pipe output through less.
457 ### but that doesn't *always* happen.. so check if the last
458 ### character is a control character, and if so remove it
459 ### at any rate, we better remove that character here, or tests
460 ### like 'eq' and hashlook ups based on names will SO not work
461 ### remove it by calculating the proper size, and then
462 ### tossing out everything that's longer than that size.
463
464 ### count number of nulls
465 my $nulls = $$data =~ tr/\0/\0/;
466
467 ### cut data + size by that many bytes
468 $entry->size( $entry->size - $nulls );
469 substr ($$data, $entry->size) = "";
470 }
471 }
472
473 ### clean up of the entries.. posix tar /apparently/ has some
474 ### weird 'feature' that allows for filenames > 255 characters
475 ### they'll put a header in with as name '././@LongLink' and the
476 ### contents will be the name of the /next/ file in the archive
477 ### pretty crappy and kludgy if you ask me
478
479 ### set the name for the next entry if this is a @LongLink;
480 ### this is one ugly hack =/ but needed for direct extraction
481 if( $entry->is_longlink ) {
482 $real_name = $data;
b30bcf62 483 next LOOP;
39713df4
RGS
484 } elsif ( defined $real_name ) {
485 $entry->name( $$real_name );
486 $entry->prefix('');
487 undef $real_name;
488 }
489
afabe0e8
CBW
490 if ($filter && $entry->name !~ $filter) {
491 next LOOP;
e0d68803 492
afabe0e8
CBW
493 ### skip this entry if it's a pax header. This is a special file added
494 ### by, among others, git-generated tarballs. It holds comments and is
495 ### not meant for extracting. See #38932: pax_global_header extracted
c3e6accb 496 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
afabe0e8
CBW
497 next LOOP;
498 } elsif ($filter_cb && ! $filter_cb->($entry)) {
499 next LOOP;
500 }
e0d68803 501
d33cd7cf
CBW
502 if ( $extract && !$entry->is_longlink
503 && !$entry->is_unknown
504 && !$entry->is_label ) {
505 $self->_extract_file( $entry ) or return;
506 }
39713df4
RGS
507
508 ### Guard against tarfiles with garbage at the end
509 last LOOP if $entry->name eq '';
510
511 ### push only the name on the rv if we're extracting
512 ### -- for extract_archive
513 push @$tarfile, ($extract ? $entry->name : $entry);
514
515 if( $limit ) {
516 $count-- unless $entry->is_longlink || $entry->is_dir;
517 last LOOP unless $count;
518 }
519 } continue {
520 undef $data;
521 }
522
523 return $tarfile;
524}
525
526=head2 $tar->contains_file( $filename )
527
528Check if the archive contains a certain file.
529It will return true if the file is in the archive, false otherwise.
530
531Note however, that this function does an exact match using C<eq>
532on the full path. So it cannot compensate for case-insensitive file-
533systems or compare 2 paths to see if they would point to the same
534underlying file.
535
536=cut
537
538sub contains_file {
539 my $self = shift;
01d11a1c 540 my $full = shift;
e0d68803 541
01d11a1c 542 return unless defined $full;
39713df4 543
c3745331
RGS
544 ### don't warn if the entry isn't there.. that's what this function
545 ### is for after all.
546 local $WARN = 0;
39713df4
RGS
547 return 1 if $self->_find_entry($full);
548 return;
549}
550
551=head2 $tar->extract( [@filenames] )
552
553Write files whose names are equivalent to any of the names in
554C<@filenames> to disk, creating subdirectories as necessary. This
555might not work too well under VMS.
556Under MacPerl, the file's modification time will be converted to the
557MacOS zero of time, and appropriate conversions will be done to the
558path. However, the length of each element of the path is not
559inspected to see whether it's longer than MacOS currently allows (32
560characters).
561
562If C<extract> is called without a list of file names, the entire
563contents of the archive are extracted.
564
565Returns a list of filenames extracted.
566
567=cut
568
569sub extract {
570 my $self = shift;
b30bcf62 571 my @args = @_;
39713df4
RGS
572 my @files;
573
f38c1908
SP
574 # use the speed optimization for all extracted files
575 local($self->{cwd}) = cwd() unless $self->{cwd};
576
eadbb00b 577 ### you requested the extraction of only certain files
b30bcf62
RGS
578 if( @args ) {
579 for my $file ( @args ) {
e0d68803 580
b30bcf62
RGS
581 ### it's already an object?
582 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
583 push @files, $file;
584 next;
39713df4 585
b30bcf62
RGS
586 ### go find it then
587 } else {
e0d68803 588
b30bcf62
RGS
589 my $found;
590 for my $entry ( @{$self->_data} ) {
591 next unless $file eq $entry->full_path;
e0d68803 592
b30bcf62
RGS
593 ### we found the file you're looking for
594 push @files, $entry;
595 $found++;
596 }
e0d68803 597
b30bcf62 598 unless( $found ) {
e0d68803 599 return $self->_error(
b30bcf62
RGS
600 qq[Could not find '$file' in archive] );
601 }
39713df4
RGS
602 }
603 }
604
605 ### just grab all the file items
606 } else {
607 @files = $self->get_files;
608 }
609
610 ### nothing found? that's an error
611 unless( scalar @files ) {
612 $self->_error( qq[No files found for ] . $self->_file );
613 return;
614 }
615
616 ### now extract them
617 for my $entry ( @files ) {
618 unless( $self->_extract_file( $entry ) ) {
619 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
620 return;
621 }
622 }
623
624 return @files;
625}
626
627=head2 $tar->extract_file( $file, [$extract_path] )
628
629Write an entry, whose name is equivalent to the file name provided to
48e76d2d 630disk. Optionally takes a second parameter, which is the full native
39713df4
RGS
631path (including filename) the entry will be written to.
632
633For example:
634
635 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
636
b30bcf62
RGS
637 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' );
638
39713df4
RGS
639Returns true on success, false on failure.
640
641=cut
642
643sub extract_file {
644 my $self = shift;
01d11a1c 645 my $file = shift; return unless defined $file;
39713df4
RGS
646 my $alt = shift;
647
648 my $entry = $self->_find_entry( $file )
649 or $self->_error( qq[Could not find an entry for '$file'] ), return;
650
651 return $self->_extract_file( $entry, $alt );
652}
653
654sub _extract_file {
655 my $self = shift;
656 my $entry = shift or return;
657 my $alt = shift;
39713df4
RGS
658
659 ### you wanted an alternate extraction location ###
660 my $name = defined $alt ? $alt : $entry->full_path;
661
662 ### splitpath takes a bool at the end to indicate
663 ### that it's splitting a dir
7f10f74b
SH
664 my ($vol,$dirs,$file);
665 if ( defined $alt ) { # It's a local-OS path
666 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
667 $entry->is_dir );
668 } else {
669 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
670 $entry->is_dir );
671 }
672
39713df4
RGS
673 my $dir;
674 ### is $name an absolute path? ###
642eb381 675 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
178aef9a
RGS
676
677 ### absolute names are not allowed to be in tarballs under
678 ### strict mode, so only allow it if a user tells us to do it
679 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
e0d68803 680 $self->_error(
178aef9a
RGS
681 q[Entry ']. $entry->full_path .q[' is an absolute path. ].
682 q[Not extracting absolute paths under SECURE EXTRACT MODE]
e0d68803 683 );
178aef9a
RGS
684 return;
685 }
e0d68803 686
178aef9a 687 ### user asked us to, it's fine.
642eb381 688 $dir = File::Spec->catpath( $vol, $dirs, "" );
39713df4
RGS
689
690 ### it's a relative path ###
691 } else {
e0d68803
JB
692 my $cwd = (ref $self and defined $self->{cwd})
693 ? $self->{cwd}
642eb381 694 : cwd();
f5afd28d 695
f5afd28d
NC
696 my @dirs = defined $alt
697 ? File::Spec->splitdir( $dirs ) # It's a local-OS path
698 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely
699 # straight from the tarball
178aef9a 700
e0d68803
JB
701 if( not defined $alt and
702 not $INSECURE_EXTRACT_MODE
703 ) {
642eb381
SH
704
705 ### paths that leave the current directory are not allowed under
706 ### strict mode, so only allow it if a user tells us to do this.
707 if( grep { $_ eq '..' } @dirs ) {
e0d68803 708
642eb381
SH
709 $self->_error(
710 q[Entry ']. $entry->full_path .q[' is attempting to leave ].
711 q[the current working directory. Not extracting under ].
712 q[SECURE EXTRACT MODE]
713 );
714 return;
e0d68803
JB
715 }
716
642eb381
SH
717 ### the archive may be asking us to extract into a symlink. This
718 ### is not sane and a possible security issue, as outlined here:
719 ### https://rt.cpan.org/Ticket/Display.html?id=30380
720 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
721 ### https://issues.rpath.com/browse/RPL-1716
722 my $full_path = $cwd;
723 for my $d ( @dirs ) {
724 $full_path = File::Spec->catdir( $full_path, $d );
e0d68803 725
642eb381
SH
726 ### we've already checked this one, and it's safe. Move on.
727 next if ref $self and $self->{_link_cache}->{$full_path};
728
729 if( -l $full_path ) {
730 my $to = readlink $full_path;
731 my $diag = "symlinked directory ($full_path => $to)";
732
733 $self->_error(
734 q[Entry ']. $entry->full_path .q[' is attempting to ].
735 qq[extract to a $diag. This is considered a security ].
736 q[vulnerability and not allowed under SECURE EXTRACT ].
737 q[MODE]
738 );
739 return;
740 }
e0d68803 741
642eb381
SH
742 ### XXX keep a cache if possible, so the stats become cheaper:
743 $self->{_link_cache}->{$full_path} = 1 if ref $self;
744 }
745 }
746
2610e7a4
JB
747 ### '.' is the directory delimiter on VMS, which has to be escaped
748 ### or changed to '_' on vms. vmsify is used, because older versions
749 ### of vmspath do not handle this properly.
750 ### Must not add a '/' to an empty directory though.
e0d68803 751 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
f5afd28d 752
e0d68803 753 my ($cwd_vol,$cwd_dir,$cwd_file)
48e76d2d
CB
754 = File::Spec->splitpath( $cwd );
755 my @cwd = File::Spec->splitdir( $cwd_dir );
756 push @cwd, $cwd_file if length $cwd_file;
81a5970e 757
eadbb00b 758 ### We need to pass '' as the last element to catpath. Craig Berry
f5afd28d 759 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
e0d68803 760 ### The root problem is that splitpath on UNIX always returns the
f5afd28d
NC
761 ### final path element as a file even if it is a directory, and of
762 ### course there is no way it can know the difference without checking
763 ### against the filesystem, which it is documented as not doing. When
764 ### you turn around and call catpath, on VMS you have to know which bits
765 ### are directory bits and which bits are file bits. In this case we
766 ### know the result should be a directory. I had thought you could omit
767 ### the file argument to catpath in such a case, but apparently on UNIX
768 ### you can't.
e0d68803
JB
769 $dir = File::Spec->catpath(
770 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
f5afd28d
NC
771 );
772
e0d68803 773 ### catdir() returns undef if the path is longer than 255 chars on
2610e7a4 774 ### older VMS systems.
81a5970e
RGS
775 unless ( defined $dir ) {
776 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
777 return;
778 }
779
39713df4
RGS
780 }
781
782 if( -e $dir && !-d _ ) {
783 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
784 return;
785 }
786
787 unless ( -d _ ) {
788 eval { File::Path::mkpath( $dir, 0, 0777 ) };
789 if( $@ ) {
642eb381
SH
790 my $fp = $entry->full_path;
791 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
39713df4
RGS
792 return;
793 }
e0d68803 794
c3745331
RGS
795 ### XXX chown here? that might not be the same as in the archive
796 ### as we're only chown'ing to the owner of the file we're extracting
797 ### not to the owner of the directory itself, which may or may not
798 ### be another entry in the archive
799 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
800 ### way to go.
801 #if( $CHOWN && CAN_CHOWN ) {
802 # chown $entry->uid, $entry->gid, $dir or
803 # $self->_error( qq[Could not set uid/gid on '$dir'] );
804 #}
39713df4
RGS
805 }
806
807 ### we're done if we just needed to create a dir ###
808 return 1 if $entry->is_dir;
809
810 my $full = File::Spec->catfile( $dir, $file );
811
812 if( $entry->is_unknown ) {
813 $self->_error( qq[Unknown file type for file '$full'] );
814 return;
815 }
816
817 if( length $entry->type && $entry->is_file ) {
818 my $fh = IO::File->new;
819 $fh->open( '>' . $full ) or (
820 $self->_error( qq[Could not open file '$full': $!] ),
821 return
822 );
823
824 if( $entry->size ) {
825 binmode $fh;
826 syswrite $fh, $entry->data or (
827 $self->_error( qq[Could not write data to '$full'] ),
828 return
829 );
830 }
831
832 close $fh or (
833 $self->_error( qq[Could not close file '$full'] ),
834 return
835 );
836
837 } else {
838 $self->_make_special_file( $entry, $full ) or return;
839 }
840
642eb381
SH
841 ### only update the timestamp if it's not a symlink; that will change the
842 ### timestamp of the original. This addresses bug #33669: Could not update
843 ### timestamp warning on symlinks
844 if( not -l $full ) {
845 utime time, $entry->mtime - TIME_OFFSET, $full or
846 $self->_error( qq[Could not update timestamp] );
847 }
39713df4 848
2610e7a4 849 if( $CHOWN && CAN_CHOWN->() ) {
39713df4
RGS
850 chown $entry->uid, $entry->gid, $full or
851 $self->_error( qq[Could not set uid/gid on '$full'] );
852 }
853
854 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
855 ### change the perms on the file they're linking too...
856 if( $CHMOD and not -l $full ) {
1c82faa7
JB
857 my $mode = $entry->mode;
858 unless ($SAME_PERMISSIONS) {
859 $mode &= ~(oct(7000) | umask);
860 }
861 chmod $mode, $full or
39713df4
RGS
862 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
863 }
864
865 return 1;
866}
867
868sub _make_special_file {
869 my $self = shift;
870 my $entry = shift or return;
871 my $file = shift; return unless defined $file;
872
873 my $err;
874
875 if( $entry->is_symlink ) {
876 my $fail;
877 if( ON_UNIX ) {
878 symlink( $entry->linkname, $file ) or $fail++;
879
880 } else {
881 $self->_extract_special_file_as_plain_file( $entry, $file )
882 or $fail++;
883 }
884
642eb381
SH
885 $err = qq[Making symbolic link '$file' to '] .
886 $entry->linkname .q[' failed] if $fail;
39713df4
RGS
887
888 } elsif ( $entry->is_hardlink ) {
889 my $fail;
890 if( ON_UNIX ) {
891 link( $entry->linkname, $file ) or $fail++;
892
893 } else {
894 $self->_extract_special_file_as_plain_file( $entry, $file )
895 or $fail++;
896 }
897
898 $err = qq[Making hard link from '] . $entry->linkname .
899 qq[' to '$file' failed] if $fail;
900
901 } elsif ( $entry->is_fifo ) {
902 ON_UNIX && !system('mknod', $file, 'p') or
903 $err = qq[Making fifo ']. $entry->name .qq[' failed];
904
905 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
906 my $mode = $entry->is_blockdev ? 'b' : 'c';
907
908 ON_UNIX && !system('mknod', $file, $mode,
909 $entry->devmajor, $entry->devminor) or
910 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
911 $entry->devmajor . qq[ min=] . $entry->devminor .
912 qq[) failed.];
913
914 } elsif ( $entry->is_socket ) {
915 ### the original doesn't do anything special for sockets.... ###
916 1;
917 }
918
919 return $err ? $self->_error( $err ) : 1;
920}
921
922### don't know how to make symlinks, let's just extract the file as
923### a plain file
924sub _extract_special_file_as_plain_file {
925 my $self = shift;
926 my $entry = shift or return;
927 my $file = shift; return unless defined $file;
928
929 my $err;
930 TRY: {
931 my $orig = $self->_find_entry( $entry->linkname );
932
933 unless( $orig ) {
934 $err = qq[Could not find file '] . $entry->linkname .
935 qq[' in memory.];
936 last TRY;
937 }
938
939 ### clone the entry, make it appear as a normal file ###
940 my $clone = $entry->clone;
941 $clone->_downgrade_to_plainfile;
942 $self->_extract_file( $clone, $file ) or last TRY;
943
944 return 1;
945 }
946
947 return $self->_error($err);
948}
949
950=head2 $tar->list_files( [\@properties] )
951
952Returns a list of the names of all the files in the archive.
953
954If C<list_files()> is passed an array reference as its first argument
955it returns a list of hash references containing the requested
956properties of each file. The following list of properties is
957supported: name, size, mtime (last modified date), mode, uid, gid,
958linkname, uname, gname, devmajor, devminor, prefix.
959
960Passing an array reference containing only one element, 'name', is
961special cased to return a list of names rather than a list of hash
962references, making it equivalent to calling C<list_files> without
963arguments.
964
965=cut
966
967sub list_files {
968 my $self = shift;
969 my $aref = shift || [ ];
970
971 unless( $self->_data ) {
972 $self->read() or return;
973 }
974
975 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
976 return map { $_->full_path } @{$self->_data};
977 } else {
978
979 #my @rv;
980 #for my $obj ( @{$self->_data} ) {
981 # push @rv, { map { $_ => $obj->$_() } @$aref };
982 #}
983 #return @rv;
984
985 ### this does the same as the above.. just needs a +{ }
986 ### to make sure perl doesn't confuse it for a block
987 return map { my $o=$_;
988 +{ map { $_ => $o->$_() } @$aref }
989 } @{$self->_data};
990 }
991}
992
993sub _find_entry {
994 my $self = shift;
995 my $file = shift;
996
997 unless( defined $file ) {
998 $self->_error( qq[No file specified] );
999 return;
1000 }
1001
b30bcf62
RGS
1002 ### it's an object already
1003 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1004
39713df4
RGS
1005 for my $entry ( @{$self->_data} ) {
1006 my $path = $entry->full_path;
1007 return $entry if $path eq $file;
1008 }
1009
1010 $self->_error( qq[No such file in archive: '$file'] );
1011 return;
1012}
1013
1014=head2 $tar->get_files( [@filenames] )
1015
1016Returns the C<Archive::Tar::File> objects matching the filenames
1017provided. If no filename list was passed, all C<Archive::Tar::File>
1018objects in the current Tar object are returned.
1019
1020Please refer to the C<Archive::Tar::File> documentation on how to
1021handle these objects.
1022
1023=cut
1024
1025sub get_files {
1026 my $self = shift;
1027
1028 return @{ $self->_data } unless @_;
1029
1030 my @list;
1031 for my $file ( @_ ) {
1032 push @list, grep { defined } $self->_find_entry( $file );
1033 }
1034
1035 return @list;
1036}
1037
1038=head2 $tar->get_content( $file )
1039
1040Return the content of the named file.
1041
1042=cut
1043
1044sub get_content {
1045 my $self = shift;
1046 my $entry = $self->_find_entry( shift ) or return;
1047
1048 return $entry->data;
1049}
1050
1051=head2 $tar->replace_content( $file, $content )
1052
1053Make the string $content be the content for the file named $file.
1054
1055=cut
1056
1057sub replace_content {
1058 my $self = shift;
1059 my $entry = $self->_find_entry( shift ) or return;
1060
1061 return $entry->replace_content( shift );
1062}
1063
1064=head2 $tar->rename( $file, $new_name )
1065
1066Rename the file of the in-memory archive to $new_name.
1067
1068Note that you must specify a Unix path for $new_name, since per tar
1069standard, all files in the archive must be Unix paths.
1070
1071Returns true on success and false on failure.
1072
1073=cut
1074
1075sub rename {
1076 my $self = shift;
1077 my $file = shift; return unless defined $file;
1078 my $new = shift; return unless defined $new;
1079
1080 my $entry = $self->_find_entry( $file ) or return;
1081
1082 return $entry->rename( $new );
1083}
1084
1085=head2 $tar->remove (@filenamelist)
1086
1087Removes any entries with names matching any of the given filenames
1088from the in-memory archive. Returns a list of C<Archive::Tar::File>
1089objects that remain.
1090
1091=cut
1092
1093sub remove {
1094 my $self = shift;
1095 my @list = @_;
1096
1097 my %seen = map { $_->full_path => $_ } @{$self->_data};
1098 delete $seen{ $_ } for @list;
1099
1100 $self->_data( [values %seen] );
1101
1102 return values %seen;
1103}
1104
1105=head2 $tar->clear
1106
1107C<clear> clears the current in-memory archive. This effectively gives
1108you a 'blank' object, ready to be filled again. Note that C<clear>
1109only has effect on the object, not the underlying tarfile.
1110
1111=cut
1112
1113sub clear {
1114 my $self = shift or return;
1115
1116 $self->_data( [] );
1117 $self->_file( '' );
1118
1119 return 1;
1120}
1121
1122
1123=head2 $tar->write ( [$file, $compressed, $prefix] )
1124
1125Write the in-memory archive to disk. The first argument can either
1126be the name of a file or a reference to an already open filehandle (a
e0d68803 1127GLOB reference).
642eb381 1128
e0d68803 1129The second argument is used to indicate compression. You can either
642eb381 1130compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1131to be the C<gzip> compression level (between 1 and 9), but the use of
eadbb00b 1132constants is preferred:
642eb381
SH
1133
1134 # write a gzip compressed file
bef46b70 1135 $tar->write( 'out.tgz', COMPRESS_GZIP );
642eb381 1136
e0d68803 1137 # write a bzip compressed file
bef46b70 1138 $tar->write( 'out.tbz', COMPRESS_BZIP );
39713df4
RGS
1139
1140Note that when you pass in a filehandle, the compression argument
1141is ignored, as all files are printed verbatim to your filehandle.
1142If you wish to enable compression with filehandles, use an
642eb381 1143C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1144
1145The third argument is an optional prefix. All files will be tucked
1146away in the directory you specify as prefix. So if you have files
1147'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1148will be written to the archive as 'foo/a' and 'foo/b'.
1149
1150If no arguments are given, C<write> returns the entire formatted
1151archive as a string, which could be useful if you'd like to stuff the
1152archive into a socket or a pipe to gzip or something.
1153
642eb381 1154
39713df4
RGS
1155=cut
1156
1157sub write {
1158 my $self = shift;
1159 my $file = shift; $file = '' unless defined $file;
1160 my $gzip = shift || 0;
1161 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1162 my $dummy = '';
e0d68803 1163
39713df4
RGS
1164 ### only need a handle if we have a file to print to ###
1165 my $handle = length($file)
1166 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1167 or return )
1168 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
e0d68803 1169 : $HAS_IO_STRING ? IO::String->new
39713df4
RGS
1170 : __PACKAGE__->no_string_support();
1171
e0d68803
JB
1172 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1173 ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1174 ### printed to the archive
1175 local $\;
39713df4
RGS
1176
1177 for my $entry ( @{$self->_data} ) {
1178 ### entries to be written to the tarfile ###
1179 my @write_me;
1180
1181 ### only now will we change the object to reflect the current state
1182 ### of the name and prefix fields -- this needs to be limited to
1183 ### write() only!
1184 my $clone = $entry->clone;
1185
1186
e0d68803 1187 ### so, if you don't want use to use the prefix, we'll stuff
39713df4
RGS
1188 ### everything in the name field instead
1189 if( $DO_NOT_USE_PREFIX ) {
1190
1191 ### you might have an extended prefix, if so, set it in the clone
1192 ### XXX is ::Unix right?
1193 $clone->name( length $ext_prefix
1194 ? File::Spec::Unix->catdir( $ext_prefix,
1195 $clone->full_path)
1196 : $clone->full_path );
1197 $clone->prefix( '' );
1198
1199 ### otherwise, we'll have to set it properly -- prefix part in the
1200 ### prefix and name part in the name field.
1201 } else {
1202
1203 ### split them here, not before!
1204 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1205
1206 ### you might have an extended prefix, if so, set it in the clone
1207 ### XXX is ::Unix right?
1208 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1209 if length $ext_prefix;
1210
1211 $clone->prefix( $prefix );
1212 $clone->name( $name );
1213 }
1214
1215 ### names are too long, and will get truncated if we don't add a
1216 ### '@LongLink' file...
1217 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
1218 length($clone->prefix) > PREFIX_LENGTH
1219 ) || 0;
1220
1221 ### perhaps we need to make a longlink file?
1222 if( $make_longlink ) {
1223 my $longlink = Archive::Tar::File->new(
1224 data => LONGLINK_NAME,
1225 $clone->full_path,
1226 { type => LONGLINK }
1227 );
1228
1229 unless( $longlink ) {
1230 $self->_error( qq[Could not create 'LongLink' entry for ] .
1231 qq[oversize file '] . $clone->full_path ."'" );
1232 return;
1233 };
1234
1235 push @write_me, $longlink;
1236 }
1237
1238 push @write_me, $clone;
1239
1240 ### write the one, optionally 2 a::t::file objects to the handle
1241 for my $clone (@write_me) {
1242
1243 ### if the file is a symlink, there are 2 options:
1244 ### either we leave the symlink intact, but then we don't write any
1245 ### data OR we follow the symlink, which means we actually make a
1246 ### copy. if we do the latter, we have to change the TYPE of the
1247 ### clone to 'FILE'
1248 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1249 my $data_ok = !$clone->is_symlink && $clone->has_content;
1250
1251 ### downgrade to a 'normal' file if it's a symlink we're going to
1252 ### treat as a regular file
1253 $clone->_downgrade_to_plainfile if $link_ok;
1254
1255 ### get the header for this block
1256 my $header = $self->_format_tar_entry( $clone );
1257 unless( $header ) {
1258 $self->_error(q[Could not format header for: ] .
1259 $clone->full_path );
1260 return;
1261 }
1262
1263 unless( print $handle $header ) {
1264 $self->_error(q[Could not write header for: ] .
1265 $clone->full_path);
1266 return;
1267 }
1268
1269 if( $link_ok or $data_ok ) {
1270 unless( print $handle $clone->data ) {
1271 $self->_error(q[Could not write data for: ] .
1272 $clone->full_path);
1273 return;
1274 }
1275
1276 ### pad the end of the clone if required ###
1277 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1278 }
1279
1280 } ### done writing these entries
1281 }
1282
1283 ### write the end markers ###
1284 print $handle TAR_END x 2 or
1285 return $self->_error( qq[Could not write tar end markers] );
b30bcf62 1286
39713df4 1287 ### did you want it written to a file, or returned as a string? ###
b30bcf62 1288 my $rv = length($file) ? 1
39713df4 1289 : $HAS_PERLIO ? $dummy
b30bcf62
RGS
1290 : do { seek $handle, 0, 0; local $/; <$handle> };
1291
d33cd7cf 1292 ### make sure to close the handle if we created it
afabe0e8
CBW
1293 if ( $file ne $handle ) {
1294 unless( close $handle ) {
1295 $self->_error( qq[Could not write tar] );
1296 return;
1297 }
1298 }
e0d68803 1299
b30bcf62 1300 return $rv;
39713df4
RGS
1301}
1302
1303sub _format_tar_entry {
1304 my $self = shift;
1305 my $entry = shift or return;
1306 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1307 my $no_prefix = shift || 0;
1308
1309 my $file = $entry->name;
1310 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1311
1312 ### remove the prefix from the file name
eadbb00b 1313 ### not sure if this is still needed --kane
39713df4
RGS
1314 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1315 ### this for us. Even worse, this would break if we tried to add a file
1316 ### like x/x.
1317 #if( length $prefix ) {
1318 # $file =~ s/^$match//;
1319 #}
1320
1321 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1322 if length $ext_prefix;
1323
1324 ### not sure why this is... ###
1325 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1326 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1327
d33cd7cf 1328 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
39713df4
RGS
1329
1330 ### this might be optimizable with a 'changed' flag in the file objects ###
1331 my $tar = pack (
1332 PACK,
1333 $file,
1334
1335 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1336 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1337
1338 "", # checksum field - space padded a bit down
1339
1340 (map { $entry->$_() } qw[type linkname magic]),
1341
1342 $entry->version || TAR_VERSION,
1343
1344 (map { $entry->$_() } qw[uname gname]),
1345 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1346
1347 ($no_prefix ? '' : $prefix)
1348 );
1349
1350 ### add the checksum ###
d33cd7cf 1351 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
39713df4
RGS
1352 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1353
1354 return $tar;
1355}
1356
1357=head2 $tar->add_files( @filenamelist )
1358
1359Takes a list of filenames and adds them to the in-memory archive.
1360
1361The path to the file is automatically converted to a Unix like
1362equivalent for use in the archive, and, if on MacOS, the file's
1363modification time is converted from the MacOS epoch to the Unix epoch.
1364So tar archives created on MacOS with B<Archive::Tar> can be read
1365both with I<tar> on Unix and applications like I<suntar> or
1366I<Stuffit Expander> on MacOS.
1367
1368Be aware that the file's type/creator and resource fork will be lost,
1369which is usually what you want in cross-platform archives.
1370
2610e7a4
JB
1371Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1372object from, for example, another archive. The object will be clone, and
1373effectively be a copy of the original, not an alias.
1374
39713df4
RGS
1375Returns a list of C<Archive::Tar::File> objects that were just added.
1376
1377=cut
1378
1379sub add_files {
1380 my $self = shift;
1381 my @files = @_ or return;
1382
1383 my @rv;
1384 for my $file ( @files ) {
2610e7a4
JB
1385
1386 ### you passed an Archive::Tar::File object
1387 ### clone it so we don't accidentally have a reference to
1388 ### an object from another archive
1389 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
e0d68803 1390 push @rv, $file->clone;
2610e7a4
JB
1391 next;
1392 }
e0d68803 1393
c3745331 1394 unless( -e $file || -l $file ) {
39713df4
RGS
1395 $self->_error( qq[No such file: '$file'] );
1396 next;
1397 }
1398
1399 my $obj = Archive::Tar::File->new( file => $file );
1400 unless( $obj ) {
1401 $self->_error( qq[Unable to add file: '$file'] );
1402 next;
1403 }
1404
1405 push @rv, $obj;
1406 }
1407
1408 push @{$self->{_data}}, @rv;
1409
1410 return @rv;
1411}
1412
1413=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1414
1415Takes a filename, a scalar full of data and optionally a reference to
1416a hash with specific options.
1417
1418Will add a file to the in-memory archive, with name C<$filename> and
1419content C<$data>. Specific properties can be set using C<$opthashref>.
1420The following list of properties is supported: name, size, mtime
1421(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1422devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4
RGS
1423modification times are converted to Unix equivalents.)
1424
b3200c5d
SP
1425Valid values for the file type are the following constants defined in
1426Archive::Tar::Constants:
1427
1428=over 4
1429
1430=item FILE
1431
1432Regular file.
1433
1434=item HARDLINK
1435
1436=item SYMLINK
1437
1438Hard and symbolic ("soft") links; linkname should specify target.
1439
1440=item CHARDEV
1441
1442=item BLOCKDEV
1443
1444Character and block devices. devmajor and devminor should specify the major
1445and minor device numbers.
1446
1447=item DIR
1448
1449Directory.
1450
1451=item FIFO
1452
1453FIFO (named pipe).
1454
1455=item SOCKET
1456
1457Socket.
1458
1459=back
1460
39713df4
RGS
1461Returns the C<Archive::Tar::File> object that was just added, or
1462C<undef> on failure.
1463
1464=cut
1465
1466sub add_data {
1467 my $self = shift;
1468 my ($file, $data, $opt) = @_;
1469
1470 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1471 unless( $obj ) {
1472 $self->_error( qq[Unable to add file: '$file'] );
1473 return;
1474 }
1475
1476 push @{$self->{_data}}, $obj;
1477
1478 return $obj;
1479}
1480
1481=head2 $tar->error( [$BOOL] )
1482
1483Returns the current errorstring (usually, the last error reported).
1484If a true value was specified, it will give the C<Carp::longmess>
1485equivalent of the error, in effect giving you a stacktrace.
1486
1487For backwards compatibility, this error is also available as
1488C<$Archive::Tar::error> although it is much recommended you use the
1489method call instead.
1490
1491=cut
1492
1493{
1494 $error = '';
1495 my $longmess;
1496
1497 sub _error {
1498 my $self = shift;
1499 my $msg = $error = shift;
1500 $longmess = Carp::longmess($error);
941cb2bb
CBW
1501 if (ref $self) {
1502 $self->{_error} = $error;
1503 $self->{_longmess} = $longmess;
1504 }
39713df4
RGS
1505
1506 ### set Archive::Tar::WARN to 0 to disable printing
1507 ### of errors
1508 if( $WARN ) {
1509 carp $DEBUG ? $longmess : $msg;
1510 }
1511
1512 return;
1513 }
1514
1515 sub error {
1516 my $self = shift;
941cb2bb
CBW
1517 if (ref $self) {
1518 return shift() ? $self->{_longmess} : $self->{_error};
1519 } else {
1520 return shift() ? $longmess : $error;
1521 }
39713df4
RGS
1522 }
1523}
1524
f38c1908
SP
1525=head2 $tar->setcwd( $cwd );
1526
1527C<Archive::Tar> needs to know the current directory, and it will run
e0d68803 1528C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
f38c1908 1529tarfile and saves it in the file system. (As of version 1.30, however,
e0d68803 1530C<Archive::Tar> will use the speed optimization described below
f38c1908
SP
1531automatically, so it's only relevant if you're using C<extract_file()>).
1532
1533Since C<Archive::Tar> doesn't change the current directory internally
1534while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1535can be avoided if we can guarantee that the current directory doesn't
1536get changed externally.
1537
1538To use this performance boost, set the current directory via
1539
1540 use Cwd;
1541 $tar->setcwd( cwd() );
1542
1543once before calling a function like C<extract_file> and
1544C<Archive::Tar> will use the current directory setting from then on
e0d68803 1545and won't call C<Cwd::cwd()> internally.
f38c1908
SP
1546
1547To switch back to the default behaviour, use
1548
1549 $tar->setcwd( undef );
1550
1551and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1552
eadbb00b 1553If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
f38c1908
SP
1554be called for you.
1555
e0d68803 1556=cut
f38c1908
SP
1557
1558sub setcwd {
1559 my $self = shift;
1560 my $cwd = shift;
1561
1562 $self->{cwd} = $cwd;
1563}
39713df4 1564
39713df4
RGS
1565=head1 Class Methods
1566
642eb381 1567=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
39713df4
RGS
1568
1569Creates a tar file from the list of files provided. The first
1570argument can either be the name of the tar file to create or a
1571reference to an open file handle (e.g. a GLOB reference).
1572
e0d68803 1573The second argument is used to indicate compression. You can either
642eb381 1574compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1575to be the C<gzip> compression level (between 1 and 9), but the use of
eadbb00b 1576constants is preferred:
642eb381
SH
1577
1578 # write a gzip compressed file
bef46b70 1579 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
642eb381 1580
e0d68803 1581 # write a bzip compressed file
bef46b70 1582 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
39713df4
RGS
1583
1584Note that when you pass in a filehandle, the compression argument
1585is ignored, as all files are printed verbatim to your filehandle.
1586If you wish to enable compression with filehandles, use an
642eb381 1587C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1588
1589The remaining arguments list the files to be included in the tar file.
1590These files must all exist. Any files which don't exist or can't be
1591read are silently ignored.
1592
1593If the archive creation fails for any reason, C<create_archive> will
1594return false. Please use the C<error> method to find the cause of the
1595failure.
1596
1597Note that this method does not write C<on the fly> as it were; it
1598still reads all the files into memory before writing out the archive.
1599Consult the FAQ below if this is a problem.
1600
1601=cut
1602
1603sub create_archive {
1604 my $class = shift;
1605
1606 my $file = shift; return unless defined $file;
1607 my $gzip = shift || 0;
1608 my @files = @_;
1609
1610 unless( @files ) {
1611 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1612 }
1613
1614 my $tar = $class->new;
1615 $tar->add_files( @files );
1616 return $tar->write( $file, $gzip );
1617}
1618
642eb381
SH
1619=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1620
1621Returns an iterator function that reads the tar file without loading
1622it all in memory. Each time the function is called it will return the
1623next file in the tarball. The files are returned as
1624C<Archive::Tar::File> objects. The iterator function returns the
941cb2bb 1625empty list once it has exhausted the files contained.
642eb381
SH
1626
1627The second argument can be a hash reference with options, which are
1628identical to the arguments passed to C<read()>.
1629
1630Example usage:
1631
1632 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1633
1634 while( my $f = $next->() ) {
1635 print $f->name, "\n";
1636
1637 $f->extract or warn "Extraction failed";
e0d68803 1638
642eb381
SH
1639 # ....
1640 }
1641
1642=cut
1643
1644
1645sub iter {
1646 my $class = shift;
1647 my $filename = shift or return;
1648 my $compressed = shift or 0;
1649 my $opts = shift || {};
1650
1651 ### get a handle to read from.
1652 my $handle = $class->_get_handle(
e0d68803
JB
1653 $filename,
1654 $compressed,
642eb381
SH
1655 READ_ONLY->( ZLIB )
1656 ) or return;
1657
1658 my @data;
1659 return sub {
1660 return shift(@data) if @data; # more than one file returned?
1661 return unless $handle; # handle exhausted?
1662
1663 ### read data, should only return file
941cb2bb
CBW
1664 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1665 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
642eb381
SH
1666
1667 ### return one piece of data
1668 return shift(@data) if @data;
e0d68803 1669
642eb381
SH
1670 ### data is exhausted, free the filehandle
1671 undef $handle;
1672 return;
1673 };
1674}
1675
1676=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
39713df4
RGS
1677
1678Returns a list of the names of all the files in the archive. The
1679first argument can either be the name of the tar file to list or a
1680reference to an open file handle (e.g. a GLOB reference).
1681
1682If C<list_archive()> is passed an array reference as its third
1683argument it returns a list of hash references containing the requested
1684properties of each file. The following list of properties is
e0d68803 1685supported: full_path, name, size, mtime (last modified date), mode,
b3200c5d
SP
1686uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1687
1688See C<Archive::Tar::File> for details about supported properties.
39713df4
RGS
1689
1690Passing an array reference containing only one element, 'name', is
1691special cased to return a list of names rather than a list of hash
1692references.
1693
1694=cut
1695
1696sub list_archive {
1697 my $class = shift;
1698 my $file = shift; return unless defined $file;
1699 my $gzip = shift || 0;
1700
1701 my $tar = $class->new($file, $gzip);
1702 return unless $tar;
1703
1704 return $tar->list_files( @_ );
1705}
1706
642eb381 1707=head2 Archive::Tar->extract_archive($file, $compressed)
39713df4
RGS
1708
1709Extracts the contents of the tar file. The first argument can either
1710be the name of the tar file to create or a reference to an open file
1711handle (e.g. a GLOB reference). All relative paths in the tar file will
1712be created underneath the current working directory.
1713
1714C<extract_archive> will return a list of files it extracted.
1715If the archive extraction fails for any reason, C<extract_archive>
1716will return false. Please use the C<error> method to find the cause
1717of the failure.
1718
1719=cut
1720
1721sub extract_archive {
1722 my $class = shift;
1723 my $file = shift; return unless defined $file;
1724 my $gzip = shift || 0;
1725
1726 my $tar = $class->new( ) or return;
1727
1728 return $tar->read( $file, $gzip, { extract => 1 } );
1729}
1730
f5695358
JB
1731=head2 $bool = Archive::Tar->has_io_string
1732
1733Returns true if we currently have C<IO::String> support loaded.
1734
e0d68803 1735Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1736stringified archives. Currently, C<perlio> is the preferred method, if
1737available.
1738
1739See the C<GLOBAL VARIABLES> section to see how to change this preference.
1740
1741=cut
1742
1743sub has_io_string { return $HAS_IO_STRING; }
1744
1745=head2 $bool = Archive::Tar->has_perlio
1746
1747Returns true if we currently have C<perlio> support loaded.
1748
e0d68803 1749This requires C<perl-5.8> or higher, compiled with C<perlio>
f5695358 1750
e0d68803 1751Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1752stringified archives. Currently, C<perlio> is the preferred method, if
1753available.
1754
1755See the C<GLOBAL VARIABLES> section to see how to change this preference.
1756
1757=cut
1758
1759sub has_perlio { return $HAS_PERLIO; }
1760
1761=head2 $bool = Archive::Tar->has_zlib_support
1762
1763Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1764
1765=cut
1766
1767sub has_zlib_support { return ZLIB }
1768
1769=head2 $bool = Archive::Tar->has_bzip2_support
1770
1771Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1772
1773=cut
1774
1775sub has_bzip2_support { return BZIP }
1776
39713df4
RGS
1777=head2 Archive::Tar->can_handle_compressed_files
1778
1779A simple checking routine, which will return true if C<Archive::Tar>
642eb381
SH
1780is able to uncompress compressed archives on the fly with C<IO::Zlib>
1781and C<IO::Compress::Bzip2> or false if not both are installed.
39713df4
RGS
1782
1783You can use this as a shortcut to determine whether C<Archive::Tar>
1784will do what you think before passing compressed archives to its
1785C<read> method.
1786
1787=cut
1788
642eb381 1789sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
39713df4
RGS
1790
1791sub no_string_support {
1792 croak("You have to install IO::String to support writing archives to strings");
1793}
1794
17951;
1796
1797__END__
1798
1799=head1 GLOBAL VARIABLES
1800
1801=head2 $Archive::Tar::FOLLOW_SYMLINK
1802
1803Set this variable to C<1> to make C<Archive::Tar> effectively make a
1804copy of the file when extracting. Default is C<0>, which
1805means the symlink stays intact. Of course, you will have to pack the
1806file linked to as well.
1807
1808This option is checked when you write out the tarfile using C<write>
1809or C<create_archive>.
1810
1811This works just like C</bin/tar>'s C<-h> option.
1812
1813=head2 $Archive::Tar::CHOWN
1814
1815By default, C<Archive::Tar> will try to C<chown> your files if it is
1816able to. In some cases, this may not be desired. In that case, set
1817this variable to C<0> to disable C<chown>-ing, even if it were
1818possible.
1819
1820The default is C<1>.
1821
1822=head2 $Archive::Tar::CHMOD
1823
1824By default, C<Archive::Tar> will try to C<chmod> your files to
1825whatever mode was specified for the particular file in the archive.
1826In some cases, this may not be desired. In that case, set this
1827variable to C<0> to disable C<chmod>-ing.
1828
1829The default is C<1>.
1830
1c82faa7
JB
1831=head2 $Archive::Tar::SAME_PERMISSIONS
1832
1833When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
1834the permissions on files from the archive are used without modification
1835of if they are filtered by removing any setid bits and applying the
1836current umask.
1837
1838The default is C<1> for the root user and C<0> for normal users.
1839
39713df4
RGS
1840=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1841
e0d68803 1842By default, C<Archive::Tar> will try to put paths that are over
f38c1908 1843100 characters in the C<prefix> field of your tar header, as
e0d68803
JB
1844defined per POSIX-standard. However, some (older) tar programs
1845do not implement this spec. To retain compatibility with these older
1846or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1847variable to a true value, and C<Archive::Tar> will use an alternate
1848way of dealing with paths over 100 characters by using the
f38c1908
SP
1849C<GNU Extended Header> feature.
1850
1851Note that clients who do not support the C<GNU Extended Header>
1852feature will not be able to read these archives. Such clients include
1853tars on C<Solaris>, C<Irix> and C<AIX>.
39713df4
RGS
1854
1855The default is C<0>.
1856
1857=head2 $Archive::Tar::DEBUG
1858
1859Set this variable to C<1> to always get the C<Carp::longmess> output
1860of the warnings, instead of the regular C<carp>. This is the same
1861message you would get by doing:
1862
1863 $tar->error(1);
1864
1865Defaults to C<0>.
1866
1867=head2 $Archive::Tar::WARN
1868
1869Set this variable to C<0> if you do not want any warnings printed.
1870Personally I recommend against doing this, but people asked for the
1871option. Also, be advised that this is of course not threadsafe.
1872
1873Defaults to C<1>.
1874
1875=head2 $Archive::Tar::error
1876
1877Holds the last reported error. Kept for historical reasons, but its
1878use is very much discouraged. Use the C<error()> method instead:
1879
1880 warn $tar->error unless $tar->extract;
1881
941cb2bb
CBW
1882Note that in older versions of this module, the C<error()> method
1883would return an effectively global value even when called an instance
1884method as above. This has since been fixed, and multiple instances of
1885C<Archive::Tar> now have separate error strings.
1886
178aef9a
RGS
1887=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
1888
1889This variable indicates whether C<Archive::Tar> should allow
1890files to be extracted outside their current working directory.
1891
1892Allowing this could have security implications, as a malicious
1893tar archive could alter or replace any file the extracting user
e0d68803
JB
1894has permissions to. Therefor, the default is to not allow
1895insecure extractions.
178aef9a 1896
e0d68803
JB
1897If you trust the archive, or have other reasons to allow the
1898archive to write files outside your current working directory,
178aef9a
RGS
1899set this variable to C<true>.
1900
1901Note that this is a backwards incompatible change from version
1902C<1.36> and before.
1903
39713df4
RGS
1904=head2 $Archive::Tar::HAS_PERLIO
1905
e0d68803 1906This variable holds a boolean indicating if we currently have
39713df4 1907C<perlio> support loaded. This will be enabled for any perl
e0d68803 1908greater than C<5.8> compiled with C<perlio>.
39713df4
RGS
1909
1910If you feel strongly about disabling it, set this variable to
1911C<false>. Note that you will then need C<IO::String> installed
1912to support writing stringified archives.
1913
1914Don't change this variable unless you B<really> know what you're
1915doing.
1916
1917=head2 $Archive::Tar::HAS_IO_STRING
1918
e0d68803 1919This variable holds a boolean indicating if we currently have
39713df4
RGS
1920C<IO::String> support loaded. This will be enabled for any perl
1921that has a loadable C<IO::String> module.
1922
1923If you feel strongly about disabling it, set this variable to
1924C<false>. Note that you will then need C<perlio> support from
1925your perl to be able to write stringified archives.
1926
1927Don't change this variable unless you B<really> know what you're
1928doing.
1929
d33cd7cf
CBW
1930=head2 $Archive::Tar::ZERO_PAD_NUMBERS
1931
1932This variable holds a boolean indicating if we will create
1933zero padded numbers for C<size>, C<mtime> and C<checksum>.
1934The default is C<0>, indicating that we will create space padded
1935numbers. Added for compatibility with C<busybox> implementations.
1936
39713df4
RGS
1937=head1 FAQ
1938
1939=over 4
1940
1941=item What's the minimum perl version required to run Archive::Tar?
1942
1943You will need perl version 5.005_03 or newer.
1944
1945=item Isn't Archive::Tar slow?
1946
1947Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1948However, it's very portable. If speed is an issue, consider using
1949C</bin/tar> instead.
1950
1951=item Isn't Archive::Tar heavier on memory than /bin/tar?
1952
1953Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1954C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1955choice but to read the archive into memory.
1956This is ok if you want to do in-memory manipulation of the archive.
642eb381 1957
39713df4
RGS
1958If you just want to extract, use the C<extract_archive> class method
1959instead. It will optimize and write to disk immediately.
1960
642eb381
SH
1961Another option is to use the C<iter> class method to iterate over
1962the files in the tarball without reading them all in memory at once.
1963
1964=item Can you lazy-load data instead?
39713df4 1965
642eb381
SH
1966In some cases, yes. You can use the C<iter> class method to iterate
1967over the files in the tarball without reading them all in memory at once.
39713df4
RGS
1968
1969=item How much memory will an X kb tar file need?
1970
1971Probably more than X kb, since it will all be read into memory. If
1972this is a problem, and you don't need to do in memory manipulation
e0d68803 1973of the archive, consider using the C<iter> class method, or C</bin/tar>
642eb381 1974instead.
39713df4
RGS
1975
1976=item What do you do with unsupported filetypes in an archive?
1977
1978C<Unix> has a few filetypes that aren't supported on other platforms,
1979like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1980try to make a copy of the original file, rather than throwing an error.
1981
1982This does require you to read the entire archive in to memory first,
1983since otherwise we wouldn't know what data to fill the copy with.
e0d68803
JB
1984(This means that you cannot use the class methods, including C<iter>
1985on archives that have incompatible filetypes and still expect things
642eb381 1986to work).
39713df4
RGS
1987
1988For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1989the extraction of this particular item didn't work.
1990
f38c1908
SP
1991=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
1992
1993By default, C<Archive::Tar> is in a completely POSIX-compatible
1994mode, which uses the POSIX-specification of C<tar> to store files.
eadbb00b 1995For paths greater than 100 characters, this is done using the
f38c1908
SP
1996C<POSIX header prefix>. Non-POSIX-compatible clients may not support
1997this part of the specification, and may only support the C<GNU Extended
1998Header> functionality. To facilitate those clients, you can set the
e0d68803 1999C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
f38c1908
SP
2000C<GLOBAL VARIABLES> section for details on this variable.
2001
c3745331
RGS
2002Note that GNU tar earlier than version 1.14 does not cope well with
2003the C<POSIX header prefix>. If you use such a version, consider setting
2004the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2005
b30bcf62
RGS
2006=item How do I extract only files that have property X from an archive?
2007
2008Sometimes, you might not wish to extract a complete archive, just
2009the files that are relevant to you, based on some criteria.
2010
2011You can do this by filtering a list of C<Archive::Tar::File> objects
2012based on your criteria. For example, to extract only files that have
2013the string C<foo> in their title, you would use:
2014
e0d68803 2015 $tar->extract(
b30bcf62 2016 grep { $_->full_path =~ /foo/ } $tar->get_files
e0d68803 2017 );
b30bcf62
RGS
2018
2019This way, you can filter on any attribute of the files in the archive.
2020Consult the C<Archive::Tar::File> documentation on how to use these
2021objects.
2022
81a5970e
RGS
2023=item How do I access .tar.Z files?
2024
2025The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2026the C<IO::Zlib> module) to access tar files that have been compressed
2027with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2028utility cannot be read by C<Compress::Zlib> and so cannot be directly
2029accesses by C<Archive::Tar>.
2030
2031If the C<uncompress> or C<gunzip> programs are available, you can use
2032one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2033
2034Firstly with C<uncompress>
2035
2036 use Archive::Tar;
2037
2038 open F, "uncompress -c $filename |";
2039 my $tar = Archive::Tar->new(*F);
2040 ...
2041
2042and this with C<gunzip>
2043
2044 use Archive::Tar;
2045
2046 open F, "gunzip -c $filename |";
2047 my $tar = Archive::Tar->new(*F);
2048 ...
2049
2050Similarly, if the C<compress> program is available, you can use this to
2051write a C<.tar.Z> file
2052
2053 use Archive::Tar;
2054 use IO::File;
2055
2056 my $fh = new IO::File "| compress -c >$filename";
2057 my $tar = Archive::Tar->new();
2058 ...
2059 $tar->write($fh);
2060 $fh->close ;
2061
01d11a1c
SP
2062=item How do I handle Unicode strings?
2063
2064C<Archive::Tar> uses byte semantics for any files it reads from or writes
2065to disk. This is not a problem if you only deal with files and never
2066look at their content or work solely with byte strings. But if you use
2067Unicode strings with character semantics, some additional steps need
2068to be taken.
2069
2070For example, if you add a Unicode string like
2071
2072 # Problem
2073 $tar->add_data('file.txt', "Euro: \x{20AC}");
2074
2075then there will be a problem later when the tarfile gets written out
2076to disk via C<$tar->write()>:
2077
2078 Wide character in print at .../Archive/Tar.pm line 1014.
2079
2080The data was added as a Unicode string and when writing it out to disk,
2081the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2082tried to convert the string to ISO-8859 and failed. The written file
2083now contains garbage.
2084
2085For this reason, Unicode strings need to be converted to UTF-8-encoded
2086bytestrings before they are handed off to C<add_data()>:
2087
2088 use Encode;
2089 my $data = "Accented character: \x{20AC}";
2090 $data = encode('utf8', $data);
2091
2092 $tar->add_data('file.txt', $data);
2093
e0d68803 2094A opposite problem occurs if you extract a UTF8-encoded file from a
01d11a1c
SP
2095tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2096will return its content as a bytestring, not as a Unicode string.
2097
2098If you want it to be a Unicode string (because you want character
2099semantics with operations like regular expression matching), you need
e0d68803 2100to decode the UTF8-encoded content and have Perl convert it into
01d11a1c
SP
2101a Unicode string:
2102
2103 use Encode;
2104 my $data = $tar->get_content();
e0d68803 2105
01d11a1c
SP
2106 # Make it a Unicode string
2107 $data = decode('utf8', $data);
2108
e0d68803 2109There is no easy way to provide this functionality in C<Archive::Tar>,
01d11a1c
SP
2110because a tarball can contain many files, and each of which could be
2111encoded in a different way.
81a5970e 2112
39713df4
RGS
2113=back
2114
f475b4a2
JB
2115=head1 CAVEATS
2116
2117The AIX tar does not fill all unused space in the tar archive with 0x00.
2118This sometimes leads to warning messages from C<Archive::Tar>.
2119
2120 Invalid header block at offset nnn
2121
2122A fix for that problem is scheduled to be released in the following levels
2123of AIX, all of which should be coming out in the 4th quarter of 2009:
2124
2125 AIX 5.3 TL7 SP10
2126 AIX 5.3 TL8 SP8
2127 AIX 5.3 TL9 SP5
2128 AIX 5.3 TL10 SP2
2129
2130 AIX 6.1 TL0 SP11
2131 AIX 6.1 TL1 SP7
2132 AIX 6.1 TL2 SP6
2133 AIX 6.1 TL3 SP3
2134
2135The IBM APAR number for this problem is IZ50240 (Reported component ID:
21365765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2137If you need an ifix please contact your local IBM AIX support.
2138
39713df4
RGS
2139=head1 TODO
2140
2141=over 4
2142
2143=item Check if passed in handles are open for read/write
2144
2145Currently I don't know of any portable pure perl way to do this.
2146Suggestions welcome.
2147
b3200c5d
SP
2148=item Allow archives to be passed in as string
2149
2150Currently, we only allow opened filehandles or filenames, but
2151not strings. The internals would need some reworking to facilitate
2152stringified archives.
2153
2154=item Facilitate processing an opened filehandle of a compressed archive
2155
2156Currently, we only support this if the filehandle is an IO::Zlib object.
2157Environments, like apache, will present you with an opened filehandle
2158to an uploaded file, which might be a compressed archive.
2159
39713df4
RGS
2160=back
2161
f38c1908
SP
2162=head1 SEE ALSO
2163
2164=over 4
2165
2166=item The GNU tar specification
2167
2168C<http://www.gnu.org/software/tar/manual/tar.html>
2169
eadbb00b 2170=item The PAX format specification
f38c1908 2171
eadbb00b 2172The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
f38c1908
SP
2173
2174=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2175
2176=item GNU tar intends to switch to POSIX compatibility
2177
2178GNU Tar authors have expressed their intention to become completely
2179POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2180
2181=item A Comparison between various tar implementations
2182
2183Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2184
2185=back
2186
39713df4
RGS
2187=head1 AUTHOR
2188
c3745331
RGS
2189This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2190
2191Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
39713df4
RGS
2192
2193=head1 ACKNOWLEDGEMENTS
2194
f475b4a2
JB
2195Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2196Rainer Tammer and especially Andrew Savige for their help and suggestions.
39713df4
RGS
2197
2198=head1 COPYRIGHT
2199
f475b4a2 2200This module is copyright (c) 2002 - 2009 Jos Boumans
c3745331 2201E<lt>kane@cpan.orgE<gt>. All rights reserved.
39713df4 2202
e0d68803 2203This library is free software; you may redistribute and/or modify
c3745331 2204it under the same terms as Perl itself.
39713df4
RGS
2205
2206=cut