This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update IPC-Cmd to CPAN version 0.60
[perl5.git] / cpan / Archive-Tar / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
f38c1908 2### http://www.gnu.org/software/tar/manual/tar.html
39713df4
RGS
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
642eb381
SH
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp qw(carp croak);
14use File::Spec ();
15use File::Spec::Unix ();
16use File::Path ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
39713df4
RGS
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
1c82faa7 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
d33cd7cf 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT
178aef9a
RGS
27 ];
28
642eb381 29@ISA = qw[Exporter];
bef46b70 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
178aef9a
RGS
31$DEBUG = 0;
32$WARN = 1;
33$FOLLOW_SYMLINK = 0;
d33cd7cf 34$VERSION = "1.62";
178aef9a
RGS
35$CHOWN = 1;
36$CHMOD = 1;
1c82faa7 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0;
178aef9a
RGS
38$DO_NOT_USE_PREFIX = 0;
39$INSECURE_EXTRACT_MODE = 0;
d33cd7cf 40$ZERO_PAD_NUMBERS = 0;
39713df4
RGS
41
42BEGIN {
43 use Config;
44 $HAS_PERLIO = $Config::Config{useperlio};
45
46 ### try and load IO::String anyway, so you can dynamically
47 ### switch between perlio and IO::String
642eb381 48 $HAS_IO_STRING = eval {
39713df4
RGS
49 require IO::String;
50 import IO::String;
642eb381
SH
51 1;
52 } || 0;
39713df4
RGS
53}
54
39713df4
RGS
55=head1 NAME
56
57Archive::Tar - module for manipulations of tar archives
58
59=head1 SYNOPSIS
60
61 use Archive::Tar;
62 my $tar = Archive::Tar->new;
63
642eb381 64 $tar->read('origin.tgz');
39713df4
RGS
65 $tar->extract();
66
67 $tar->add_files('file/foo.pl', 'docs/README');
68 $tar->add_data('file/baz.txt', 'This is the contents now');
69
70 $tar->rename('oldname', 'new/file/name');
71
642eb381 72 $tar->write('files.tar'); # plain tar
bef46b70
JB
73 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed
74 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed
39713df4
RGS
75
76=head1 DESCRIPTION
77
78Archive::Tar provides an object oriented mechanism for handling tar
79files. It provides class methods for quick and easy files handling
80while also allowing for the creation of tar file objects for custom
81manipulation. If you have the IO::Zlib module installed,
82Archive::Tar will also support compressed or gzipped tar files.
83
84An object of class Archive::Tar represents a .tar(.gz) archive full
85of files and things.
86
87=head1 Object Methods
88
89=head2 Archive::Tar->new( [$file, $compressed] )
90
91Returns a new Tar object. If given any arguments, C<new()> calls the
92C<read()> method automatically, passing on the arguments provided to
93the C<read()> method.
94
95If C<new()> is invoked with arguments and the C<read()> method fails
96for any reason, C<new()> returns undef.
97
98=cut
99
100my $tmpl = {
101 _data => [ ],
102 _file => 'Unknown',
103};
104
105### install get/set accessors for this object.
106for my $key ( keys %$tmpl ) {
107 no strict 'refs';
108 *{__PACKAGE__."::$key"} = sub {
109 my $self = shift;
110 $self->{$key} = $_[0] if @_;
111 return $self->{$key};
112 }
113}
114
115sub new {
116 my $class = shift;
117 $class = ref $class if ref $class;
118
119 ### copying $tmpl here since a shallow copy makes it use the
120 ### same aref, causing for files to remain in memory always.
941cb2bb 121 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
39713df4
RGS
122
123 if (@_) {
81a5970e
RGS
124 unless ( $obj->read( @_ ) ) {
125 $obj->_error(qq[No data could be read from file]);
126 return;
127 }
39713df4
RGS
128 }
129
130 return $obj;
131}
132
642eb381 133=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
39713df4
RGS
134
135Read the given tar file into memory.
136The first argument can either be the name of a file or a reference to
137an already open filehandle (or an IO::Zlib object if it's compressed)
39713df4
RGS
138
139The C<read> will I<replace> any previous content in C<$tar>!
140
e0d68803 141The second argument may be considered optional, but remains for
642eb381
SH
142backwards compatibility. Archive::Tar now looks at the file
143magic to determine what class should be used to open the file
144and will transparently Do The Right Thing.
145
146Archive::Tar will warn if you try to pass a bzip2 compressed file and the
147IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
39713df4 148
b3200c5d 149Note that you can currently B<not> pass a C<gzip> compressed
642eb381
SH
150filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
151filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
b3200c5d
SP
152containing the full archive information (either compressed or
153uncompressed). These are worth while features, but not currently
154implemented. See the C<TODO> section.
155
39713df4
RGS
156The third argument can be a hash reference with options. Note that
157all options are case-sensitive.
158
159=over 4
160
161=item limit
162
163Do not read more than C<limit> files. This is useful if you have
164very big archives, and are only interested in the first few files.
165
642eb381
SH
166=item filter
167
168Can be set to a regular expression. Only files with names that match
169the expression will be read.
170
39713df4
RGS
171=item extract
172
173If set to true, immediately extract entries when reading them. This
174gives you the same memory break as the C<extract_archive> function.
175Note however that entries will not be read into memory, but written
e0d68803 176straight to disk. This means no C<Archive::Tar::File> objects are
642eb381 177created for you to inspect.
39713df4
RGS
178
179=back
180
181All files are stored internally as C<Archive::Tar::File> objects.
182Please consult the L<Archive::Tar::File> documentation for details.
183
184Returns the number of files read in scalar context, and a list of
185C<Archive::Tar::File> objects in list context.
186
187=cut
188
189sub read {
190 my $self = shift;
191 my $file = shift;
192 my $gzip = shift || 0;
193 my $opts = shift || {};
194
195 unless( defined $file ) {
196 $self->_error( qq[No file to read from!] );
197 return;
198 } else {
199 $self->_file( $file );
200 }
201
202 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
203 or return;
204
205 my $data = $self->_read_tar( $handle, $opts ) or return;
206
207 $self->_data( $data );
208
209 return wantarray ? @$data : scalar @$data;
210}
211
212sub _get_handle {
642eb381
SH
213 my $self = shift;
214 my $file = shift; return unless defined $file;
215 return $file if ref $file;
216 my $compress = shift || 0;
217 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
218
219
220 ### get a FH opened to the right class, so we can use it transparently
221 ### throughout the program
222 my $fh;
223 { ### reading magic only makes sense if we're opening a file for
224 ### reading. otherwise, just use what the user requested.
225 my $magic = '';
226 if( MODE_READ->($mode) ) {
227 open my $tmp, $file or do {
228 $self->_error( qq[Could not open '$file' for reading: $!] );
229 return;
230 };
e0d68803 231
642eb381
SH
232 ### read the first 4 bites of the file to figure out which class to
233 ### use to open the file.
e0d68803 234 sysread( $tmp, $magic, 4 );
642eb381
SH
235 close $tmp;
236 }
39713df4 237
642eb381
SH
238 ### is it bzip?
239 ### if you asked specifically for bzip compression, or if we're in
240 ### read mode and the magic numbers add up, use bzip
241 if( BZIP and (
e0d68803 242 ($compress eq COMPRESS_BZIP) or
642eb381
SH
243 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
244 )
245 ) {
e0d68803 246
642eb381
SH
247 ### different reader/writer modules, different error vars... sigh
248 if( MODE_READ->($mode) ) {
249 $fh = IO::Uncompress::Bunzip2->new( $file ) or do {
250 $self->_error( qq[Could not read '$file': ] .
251 $IO::Uncompress::Bunzip2::Bunzip2Error
252 );
253 return;
254 };
e0d68803 255
642eb381
SH
256 } else {
257 $fh = IO::Compress::Bzip2->new( $file ) or do {
258 $self->_error( qq[Could not write to '$file': ] .
259 $IO::Compress::Bzip2::Bzip2Error
260 );
261 return;
262 };
263 }
e0d68803 264
642eb381
SH
265 ### is it gzip?
266 ### if you asked for compression, if you wanted to read or the gzip
267 ### magic number is present (redundant with read)
268 } elsif( ZLIB and (
269 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
e0d68803 270 )
642eb381
SH
271 ) {
272 $fh = IO::Zlib->new;
39713df4 273
642eb381
SH
274 unless( $fh->open( $file, $mode ) ) {
275 $self->_error(qq[Could not create filehandle for '$file': $!]);
276 return;
277 }
e0d68803 278
642eb381 279 ### is it plain tar?
39713df4 280 } else {
642eb381 281 $fh = IO::File->new;
39713df4 282
642eb381
SH
283 unless( $fh->open( $file, $mode ) ) {
284 $self->_error(qq[Could not create filehandle for '$file': $!]);
285 return;
286 }
39713df4 287
642eb381
SH
288 ### enable bin mode on tar archives
289 binmode $fh;
e0d68803 290 }
642eb381 291 }
39713df4
RGS
292
293 return $fh;
294}
295
642eb381 296
39713df4
RGS
297sub _read_tar {
298 my $self = shift;
299 my $handle = shift or return;
300 my $opts = shift || {};
301
302 my $count = $opts->{limit} || 0;
642eb381 303 my $filter = $opts->{filter};
39713df4
RGS
304 my $extract = $opts->{extract} || 0;
305
306 ### set a cap on the amount of files to extract ###
307 my $limit = 0;
308 $limit = 1 if $count > 0;
309
310 my $tarfile = [ ];
311 my $chunk;
312 my $read = 0;
313 my $real_name; # to set the name of a file when
314 # we're encountering @longlink
315 my $data;
316
317 LOOP:
318 while( $handle->read( $chunk, HEAD ) ) {
319 ### IO::Zlib doesn't support this yet
320 my $offset = eval { tell $handle } || 'unknown';
d33cd7cf 321 $@ = '';
39713df4
RGS
322
323 unless( $read++ ) {
324 my $gzip = GZIP_MAGIC_NUM;
325 if( $chunk =~ /$gzip/ ) {
326 $self->_error( qq[Cannot read compressed format in tar-mode] );
327 return;
328 }
4feb3b72
JB
329
330 ### size is < HEAD, which means a corrupted file, as the minimum
331 ### length is _at least_ HEAD
332 if (length $chunk != HEAD) {
333 $self->_error( qq[Cannot read enough bytes from the tarfile] );
334 return;
335 }
39713df4
RGS
336 }
337
338 ### if we can't read in all bytes... ###
339 last if length $chunk != HEAD;
340
341 ### Apparently this should really be two blocks of 512 zeroes,
342 ### but GNU tar sometimes gets it wrong. See comment in the
343 ### source code (tar.c) to GNU cpio.
344 next if $chunk eq TAR_END;
345
b30bcf62
RGS
346 ### according to the posix spec, the last 12 bytes of the header are
347 ### null bytes, to pad it to a 512 byte block. That means if these
348 ### bytes are NOT null bytes, it's a corrrupt header. See:
349 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
350 ### line 111
351 { my $nulls = join '', "\0" x 12;
352 unless( $nulls eq substr( $chunk, 500, 12 ) ) {
353 $self->_error( qq[Invalid header block at offset $offset] );
354 next LOOP;
355 }
356 }
357
81a5970e
RGS
358 ### pass the realname, so we can set it 'proper' right away
359 ### some of the heuristics are done on the name, so important
360 ### to set it ASAP
39713df4 361 my $entry;
81a5970e
RGS
362 { my %extra_args = ();
363 $extra_args{'name'} = $$real_name if defined $real_name;
e0d68803
JB
364
365 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
366 %extra_args )
81a5970e
RGS
367 ) {
368 $self->_error( qq[Couldn't read chunk at offset $offset] );
b30bcf62 369 next LOOP;
81a5970e 370 }
39713df4
RGS
371 }
372
373 ### ignore labels:
d33cd7cf 374 ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
39713df4
RGS
375 next if $entry->is_label;
376
377 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
378
379 if ( $entry->is_file && !$entry->validate ) {
380 ### sometimes the chunk is rather fux0r3d and a whole 512
c3745331 381 ### bytes ends up in the ->name area.
39713df4
RGS
382 ### clean it up, if need be
383 my $name = $entry->name;
384 $name = substr($name, 0, 100) if length $name > 100;
385 $name =~ s/\n/ /g;
386
387 $self->_error( $name . qq[: checksum error] );
388 next LOOP;
389 }
390
391 my $block = BLOCK_SIZE->( $entry->size );
392
393 $data = $entry->get_content_by_ref;
394
395 ### just read everything into memory
396 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
397 ### this is because Compress::Zlib doesn't support it =/
398 ### this reads in the whole data in one read() call.
399 if( $handle->read( $$data, $block ) < $block ) {
400 $self->_error( qq[Read error on tarfile (missing data) '].
401 $entry->full_path ."' at offset $offset" );
b30bcf62 402 next LOOP;
39713df4
RGS
403 }
404
405 ### throw away trailing garbage ###
376cc5ea 406 substr ($$data, $entry->size) = "" if defined $$data;
39713df4
RGS
407
408 ### part II of the @LongLink munging -- need to do /after/
409 ### the checksum check.
410 if( $entry->is_longlink ) {
411 ### weird thing in tarfiles -- if the file is actually a
412 ### @LongLink, the data part seems to have a trailing ^@
413 ### (unprintable) char. to display, pipe output through less.
414 ### but that doesn't *always* happen.. so check if the last
415 ### character is a control character, and if so remove it
416 ### at any rate, we better remove that character here, or tests
417 ### like 'eq' and hashlook ups based on names will SO not work
418 ### remove it by calculating the proper size, and then
419 ### tossing out everything that's longer than that size.
420
421 ### count number of nulls
422 my $nulls = $$data =~ tr/\0/\0/;
423
424 ### cut data + size by that many bytes
425 $entry->size( $entry->size - $nulls );
426 substr ($$data, $entry->size) = "";
427 }
428 }
429
430 ### clean up of the entries.. posix tar /apparently/ has some
431 ### weird 'feature' that allows for filenames > 255 characters
432 ### they'll put a header in with as name '././@LongLink' and the
433 ### contents will be the name of the /next/ file in the archive
434 ### pretty crappy and kludgy if you ask me
435
436 ### set the name for the next entry if this is a @LongLink;
437 ### this is one ugly hack =/ but needed for direct extraction
438 if( $entry->is_longlink ) {
439 $real_name = $data;
b30bcf62 440 next LOOP;
39713df4
RGS
441 } elsif ( defined $real_name ) {
442 $entry->name( $$real_name );
443 $entry->prefix('');
444 undef $real_name;
445 }
446
642eb381
SH
447 ### skip this entry if we're filtering
448 if ($filter && $entry->name !~ $filter) {
449 next LOOP;
e0d68803 450
642eb381
SH
451 ### skip this entry if it's a pax header. This is a special file added
452 ### by, among others, git-generated tarballs. It holds comments and is
e0d68803 453 ### not meant for extracting. See #38932: pax_global_header extracted
642eb381
SH
454 } elsif ( $entry->name eq PAX_HEADER ) {
455 next LOOP;
456 }
e0d68803 457
d33cd7cf
CBW
458 if ( $extract && !$entry->is_longlink
459 && !$entry->is_unknown
460 && !$entry->is_label ) {
461 $self->_extract_file( $entry ) or return;
462 }
39713df4
RGS
463
464 ### Guard against tarfiles with garbage at the end
465 last LOOP if $entry->name eq '';
466
467 ### push only the name on the rv if we're extracting
468 ### -- for extract_archive
469 push @$tarfile, ($extract ? $entry->name : $entry);
470
471 if( $limit ) {
472 $count-- unless $entry->is_longlink || $entry->is_dir;
473 last LOOP unless $count;
474 }
475 } continue {
476 undef $data;
477 }
478
479 return $tarfile;
480}
481
482=head2 $tar->contains_file( $filename )
483
484Check if the archive contains a certain file.
485It will return true if the file is in the archive, false otherwise.
486
487Note however, that this function does an exact match using C<eq>
488on the full path. So it cannot compensate for case-insensitive file-
489systems or compare 2 paths to see if they would point to the same
490underlying file.
491
492=cut
493
494sub contains_file {
495 my $self = shift;
01d11a1c 496 my $full = shift;
e0d68803 497
01d11a1c 498 return unless defined $full;
39713df4 499
c3745331
RGS
500 ### don't warn if the entry isn't there.. that's what this function
501 ### is for after all.
502 local $WARN = 0;
39713df4
RGS
503 return 1 if $self->_find_entry($full);
504 return;
505}
506
507=head2 $tar->extract( [@filenames] )
508
509Write files whose names are equivalent to any of the names in
510C<@filenames> to disk, creating subdirectories as necessary. This
511might not work too well under VMS.
512Under MacPerl, the file's modification time will be converted to the
513MacOS zero of time, and appropriate conversions will be done to the
514path. However, the length of each element of the path is not
515inspected to see whether it's longer than MacOS currently allows (32
516characters).
517
518If C<extract> is called without a list of file names, the entire
519contents of the archive are extracted.
520
521Returns a list of filenames extracted.
522
523=cut
524
525sub extract {
526 my $self = shift;
b30bcf62 527 my @args = @_;
39713df4
RGS
528 my @files;
529
f38c1908
SP
530 # use the speed optimization for all extracted files
531 local($self->{cwd}) = cwd() unless $self->{cwd};
532
39713df4 533 ### you requested the extraction of only certian files
b30bcf62
RGS
534 if( @args ) {
535 for my $file ( @args ) {
e0d68803 536
b30bcf62
RGS
537 ### it's already an object?
538 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
539 push @files, $file;
540 next;
39713df4 541
b30bcf62
RGS
542 ### go find it then
543 } else {
e0d68803 544
b30bcf62
RGS
545 my $found;
546 for my $entry ( @{$self->_data} ) {
547 next unless $file eq $entry->full_path;
e0d68803 548
b30bcf62
RGS
549 ### we found the file you're looking for
550 push @files, $entry;
551 $found++;
552 }
e0d68803 553
b30bcf62 554 unless( $found ) {
e0d68803 555 return $self->_error(
b30bcf62
RGS
556 qq[Could not find '$file' in archive] );
557 }
39713df4
RGS
558 }
559 }
560
561 ### just grab all the file items
562 } else {
563 @files = $self->get_files;
564 }
565
566 ### nothing found? that's an error
567 unless( scalar @files ) {
568 $self->_error( qq[No files found for ] . $self->_file );
569 return;
570 }
571
572 ### now extract them
573 for my $entry ( @files ) {
574 unless( $self->_extract_file( $entry ) ) {
575 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
576 return;
577 }
578 }
579
580 return @files;
581}
582
583=head2 $tar->extract_file( $file, [$extract_path] )
584
585Write an entry, whose name is equivalent to the file name provided to
48e76d2d 586disk. Optionally takes a second parameter, which is the full native
39713df4
RGS
587path (including filename) the entry will be written to.
588
589For example:
590
591 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
592
b30bcf62
RGS
593 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' );
594
39713df4
RGS
595Returns true on success, false on failure.
596
597=cut
598
599sub extract_file {
600 my $self = shift;
01d11a1c 601 my $file = shift; return unless defined $file;
39713df4
RGS
602 my $alt = shift;
603
604 my $entry = $self->_find_entry( $file )
605 or $self->_error( qq[Could not find an entry for '$file'] ), return;
606
607 return $self->_extract_file( $entry, $alt );
608}
609
610sub _extract_file {
611 my $self = shift;
612 my $entry = shift or return;
613 my $alt = shift;
39713df4
RGS
614
615 ### you wanted an alternate extraction location ###
616 my $name = defined $alt ? $alt : $entry->full_path;
617
618 ### splitpath takes a bool at the end to indicate
619 ### that it's splitting a dir
7f10f74b
SH
620 my ($vol,$dirs,$file);
621 if ( defined $alt ) { # It's a local-OS path
622 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
623 $entry->is_dir );
624 } else {
625 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
626 $entry->is_dir );
627 }
628
39713df4
RGS
629 my $dir;
630 ### is $name an absolute path? ###
642eb381 631 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
178aef9a
RGS
632
633 ### absolute names are not allowed to be in tarballs under
634 ### strict mode, so only allow it if a user tells us to do it
635 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
e0d68803 636 $self->_error(
178aef9a
RGS
637 q[Entry ']. $entry->full_path .q[' is an absolute path. ].
638 q[Not extracting absolute paths under SECURE EXTRACT MODE]
e0d68803 639 );
178aef9a
RGS
640 return;
641 }
e0d68803 642
178aef9a 643 ### user asked us to, it's fine.
642eb381 644 $dir = File::Spec->catpath( $vol, $dirs, "" );
39713df4
RGS
645
646 ### it's a relative path ###
647 } else {
e0d68803
JB
648 my $cwd = (ref $self and defined $self->{cwd})
649 ? $self->{cwd}
642eb381 650 : cwd();
f5afd28d 651
f5afd28d
NC
652 my @dirs = defined $alt
653 ? File::Spec->splitdir( $dirs ) # It's a local-OS path
654 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely
655 # straight from the tarball
178aef9a 656
e0d68803
JB
657 if( not defined $alt and
658 not $INSECURE_EXTRACT_MODE
659 ) {
642eb381
SH
660
661 ### paths that leave the current directory are not allowed under
662 ### strict mode, so only allow it if a user tells us to do this.
663 if( grep { $_ eq '..' } @dirs ) {
e0d68803 664
642eb381
SH
665 $self->_error(
666 q[Entry ']. $entry->full_path .q[' is attempting to leave ].
667 q[the current working directory. Not extracting under ].
668 q[SECURE EXTRACT MODE]
669 );
670 return;
e0d68803
JB
671 }
672
642eb381
SH
673 ### the archive may be asking us to extract into a symlink. This
674 ### is not sane and a possible security issue, as outlined here:
675 ### https://rt.cpan.org/Ticket/Display.html?id=30380
676 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
677 ### https://issues.rpath.com/browse/RPL-1716
678 my $full_path = $cwd;
679 for my $d ( @dirs ) {
680 $full_path = File::Spec->catdir( $full_path, $d );
e0d68803 681
642eb381
SH
682 ### we've already checked this one, and it's safe. Move on.
683 next if ref $self and $self->{_link_cache}->{$full_path};
684
685 if( -l $full_path ) {
686 my $to = readlink $full_path;
687 my $diag = "symlinked directory ($full_path => $to)";
688
689 $self->_error(
690 q[Entry ']. $entry->full_path .q[' is attempting to ].
691 qq[extract to a $diag. This is considered a security ].
692 q[vulnerability and not allowed under SECURE EXTRACT ].
693 q[MODE]
694 );
695 return;
696 }
e0d68803 697
642eb381
SH
698 ### XXX keep a cache if possible, so the stats become cheaper:
699 $self->{_link_cache}->{$full_path} = 1 if ref $self;
700 }
701 }
702
2610e7a4
JB
703 ### '.' is the directory delimiter on VMS, which has to be escaped
704 ### or changed to '_' on vms. vmsify is used, because older versions
705 ### of vmspath do not handle this properly.
706 ### Must not add a '/' to an empty directory though.
e0d68803 707 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
f5afd28d 708
e0d68803 709 my ($cwd_vol,$cwd_dir,$cwd_file)
48e76d2d
CB
710 = File::Spec->splitpath( $cwd );
711 my @cwd = File::Spec->splitdir( $cwd_dir );
712 push @cwd, $cwd_file if length $cwd_file;
81a5970e 713
f5afd28d
NC
714 ### We need to pass '' as the last elemant to catpath. Craig Berry
715 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
e0d68803 716 ### The root problem is that splitpath on UNIX always returns the
f5afd28d
NC
717 ### final path element as a file even if it is a directory, and of
718 ### course there is no way it can know the difference without checking
719 ### against the filesystem, which it is documented as not doing. When
720 ### you turn around and call catpath, on VMS you have to know which bits
721 ### are directory bits and which bits are file bits. In this case we
722 ### know the result should be a directory. I had thought you could omit
723 ### the file argument to catpath in such a case, but apparently on UNIX
724 ### you can't.
e0d68803
JB
725 $dir = File::Spec->catpath(
726 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
f5afd28d
NC
727 );
728
e0d68803 729 ### catdir() returns undef if the path is longer than 255 chars on
2610e7a4 730 ### older VMS systems.
81a5970e
RGS
731 unless ( defined $dir ) {
732 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
733 return;
734 }
735
39713df4
RGS
736 }
737
738 if( -e $dir && !-d _ ) {
739 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
740 return;
741 }
742
743 unless ( -d _ ) {
744 eval { File::Path::mkpath( $dir, 0, 0777 ) };
745 if( $@ ) {
642eb381
SH
746 my $fp = $entry->full_path;
747 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
39713df4
RGS
748 return;
749 }
e0d68803 750
c3745331
RGS
751 ### XXX chown here? that might not be the same as in the archive
752 ### as we're only chown'ing to the owner of the file we're extracting
753 ### not to the owner of the directory itself, which may or may not
754 ### be another entry in the archive
755 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
756 ### way to go.
757 #if( $CHOWN && CAN_CHOWN ) {
758 # chown $entry->uid, $entry->gid, $dir or
759 # $self->_error( qq[Could not set uid/gid on '$dir'] );
760 #}
39713df4
RGS
761 }
762
763 ### we're done if we just needed to create a dir ###
764 return 1 if $entry->is_dir;
765
766 my $full = File::Spec->catfile( $dir, $file );
767
768 if( $entry->is_unknown ) {
769 $self->_error( qq[Unknown file type for file '$full'] );
770 return;
771 }
772
773 if( length $entry->type && $entry->is_file ) {
774 my $fh = IO::File->new;
775 $fh->open( '>' . $full ) or (
776 $self->_error( qq[Could not open file '$full': $!] ),
777 return
778 );
779
780 if( $entry->size ) {
781 binmode $fh;
782 syswrite $fh, $entry->data or (
783 $self->_error( qq[Could not write data to '$full'] ),
784 return
785 );
786 }
787
788 close $fh or (
789 $self->_error( qq[Could not close file '$full'] ),
790 return
791 );
792
793 } else {
794 $self->_make_special_file( $entry, $full ) or return;
795 }
796
642eb381
SH
797 ### only update the timestamp if it's not a symlink; that will change the
798 ### timestamp of the original. This addresses bug #33669: Could not update
799 ### timestamp warning on symlinks
800 if( not -l $full ) {
801 utime time, $entry->mtime - TIME_OFFSET, $full or
802 $self->_error( qq[Could not update timestamp] );
803 }
39713df4 804
2610e7a4 805 if( $CHOWN && CAN_CHOWN->() ) {
39713df4
RGS
806 chown $entry->uid, $entry->gid, $full or
807 $self->_error( qq[Could not set uid/gid on '$full'] );
808 }
809
810 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
811 ### change the perms on the file they're linking too...
812 if( $CHMOD and not -l $full ) {
1c82faa7
JB
813 my $mode = $entry->mode;
814 unless ($SAME_PERMISSIONS) {
815 $mode &= ~(oct(7000) | umask);
816 }
817 chmod $mode, $full or
39713df4
RGS
818 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
819 }
820
821 return 1;
822}
823
824sub _make_special_file {
825 my $self = shift;
826 my $entry = shift or return;
827 my $file = shift; return unless defined $file;
828
829 my $err;
830
831 if( $entry->is_symlink ) {
832 my $fail;
833 if( ON_UNIX ) {
834 symlink( $entry->linkname, $file ) or $fail++;
835
836 } else {
837 $self->_extract_special_file_as_plain_file( $entry, $file )
838 or $fail++;
839 }
840
642eb381
SH
841 $err = qq[Making symbolic link '$file' to '] .
842 $entry->linkname .q[' failed] if $fail;
39713df4
RGS
843
844 } elsif ( $entry->is_hardlink ) {
845 my $fail;
846 if( ON_UNIX ) {
847 link( $entry->linkname, $file ) or $fail++;
848
849 } else {
850 $self->_extract_special_file_as_plain_file( $entry, $file )
851 or $fail++;
852 }
853
854 $err = qq[Making hard link from '] . $entry->linkname .
855 qq[' to '$file' failed] if $fail;
856
857 } elsif ( $entry->is_fifo ) {
858 ON_UNIX && !system('mknod', $file, 'p') or
859 $err = qq[Making fifo ']. $entry->name .qq[' failed];
860
861 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
862 my $mode = $entry->is_blockdev ? 'b' : 'c';
863
864 ON_UNIX && !system('mknod', $file, $mode,
865 $entry->devmajor, $entry->devminor) or
866 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
867 $entry->devmajor . qq[ min=] . $entry->devminor .
868 qq[) failed.];
869
870 } elsif ( $entry->is_socket ) {
871 ### the original doesn't do anything special for sockets.... ###
872 1;
873 }
874
875 return $err ? $self->_error( $err ) : 1;
876}
877
878### don't know how to make symlinks, let's just extract the file as
879### a plain file
880sub _extract_special_file_as_plain_file {
881 my $self = shift;
882 my $entry = shift or return;
883 my $file = shift; return unless defined $file;
884
885 my $err;
886 TRY: {
887 my $orig = $self->_find_entry( $entry->linkname );
888
889 unless( $orig ) {
890 $err = qq[Could not find file '] . $entry->linkname .
891 qq[' in memory.];
892 last TRY;
893 }
894
895 ### clone the entry, make it appear as a normal file ###
896 my $clone = $entry->clone;
897 $clone->_downgrade_to_plainfile;
898 $self->_extract_file( $clone, $file ) or last TRY;
899
900 return 1;
901 }
902
903 return $self->_error($err);
904}
905
906=head2 $tar->list_files( [\@properties] )
907
908Returns a list of the names of all the files in the archive.
909
910If C<list_files()> is passed an array reference as its first argument
911it returns a list of hash references containing the requested
912properties of each file. The following list of properties is
913supported: name, size, mtime (last modified date), mode, uid, gid,
914linkname, uname, gname, devmajor, devminor, prefix.
915
916Passing an array reference containing only one element, 'name', is
917special cased to return a list of names rather than a list of hash
918references, making it equivalent to calling C<list_files> without
919arguments.
920
921=cut
922
923sub list_files {
924 my $self = shift;
925 my $aref = shift || [ ];
926
927 unless( $self->_data ) {
928 $self->read() or return;
929 }
930
931 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
932 return map { $_->full_path } @{$self->_data};
933 } else {
934
935 #my @rv;
936 #for my $obj ( @{$self->_data} ) {
937 # push @rv, { map { $_ => $obj->$_() } @$aref };
938 #}
939 #return @rv;
940
941 ### this does the same as the above.. just needs a +{ }
942 ### to make sure perl doesn't confuse it for a block
943 return map { my $o=$_;
944 +{ map { $_ => $o->$_() } @$aref }
945 } @{$self->_data};
946 }
947}
948
949sub _find_entry {
950 my $self = shift;
951 my $file = shift;
952
953 unless( defined $file ) {
954 $self->_error( qq[No file specified] );
955 return;
956 }
957
b30bcf62
RGS
958 ### it's an object already
959 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
960
39713df4
RGS
961 for my $entry ( @{$self->_data} ) {
962 my $path = $entry->full_path;
963 return $entry if $path eq $file;
964 }
965
966 $self->_error( qq[No such file in archive: '$file'] );
967 return;
968}
969
970=head2 $tar->get_files( [@filenames] )
971
972Returns the C<Archive::Tar::File> objects matching the filenames
973provided. If no filename list was passed, all C<Archive::Tar::File>
974objects in the current Tar object are returned.
975
976Please refer to the C<Archive::Tar::File> documentation on how to
977handle these objects.
978
979=cut
980
981sub get_files {
982 my $self = shift;
983
984 return @{ $self->_data } unless @_;
985
986 my @list;
987 for my $file ( @_ ) {
988 push @list, grep { defined } $self->_find_entry( $file );
989 }
990
991 return @list;
992}
993
994=head2 $tar->get_content( $file )
995
996Return the content of the named file.
997
998=cut
999
1000sub get_content {
1001 my $self = shift;
1002 my $entry = $self->_find_entry( shift ) or return;
1003
1004 return $entry->data;
1005}
1006
1007=head2 $tar->replace_content( $file, $content )
1008
1009Make the string $content be the content for the file named $file.
1010
1011=cut
1012
1013sub replace_content {
1014 my $self = shift;
1015 my $entry = $self->_find_entry( shift ) or return;
1016
1017 return $entry->replace_content( shift );
1018}
1019
1020=head2 $tar->rename( $file, $new_name )
1021
1022Rename the file of the in-memory archive to $new_name.
1023
1024Note that you must specify a Unix path for $new_name, since per tar
1025standard, all files in the archive must be Unix paths.
1026
1027Returns true on success and false on failure.
1028
1029=cut
1030
1031sub rename {
1032 my $self = shift;
1033 my $file = shift; return unless defined $file;
1034 my $new = shift; return unless defined $new;
1035
1036 my $entry = $self->_find_entry( $file ) or return;
1037
1038 return $entry->rename( $new );
1039}
1040
1041=head2 $tar->remove (@filenamelist)
1042
1043Removes any entries with names matching any of the given filenames
1044from the in-memory archive. Returns a list of C<Archive::Tar::File>
1045objects that remain.
1046
1047=cut
1048
1049sub remove {
1050 my $self = shift;
1051 my @list = @_;
1052
1053 my %seen = map { $_->full_path => $_ } @{$self->_data};
1054 delete $seen{ $_ } for @list;
1055
1056 $self->_data( [values %seen] );
1057
1058 return values %seen;
1059}
1060
1061=head2 $tar->clear
1062
1063C<clear> clears the current in-memory archive. This effectively gives
1064you a 'blank' object, ready to be filled again. Note that C<clear>
1065only has effect on the object, not the underlying tarfile.
1066
1067=cut
1068
1069sub clear {
1070 my $self = shift or return;
1071
1072 $self->_data( [] );
1073 $self->_file( '' );
1074
1075 return 1;
1076}
1077
1078
1079=head2 $tar->write ( [$file, $compressed, $prefix] )
1080
1081Write the in-memory archive to disk. The first argument can either
1082be the name of a file or a reference to an already open filehandle (a
e0d68803 1083GLOB reference).
642eb381 1084
e0d68803 1085The second argument is used to indicate compression. You can either
642eb381 1086compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1087to be the C<gzip> compression level (between 1 and 9), but the use of
642eb381
SH
1088constants is prefered:
1089
1090 # write a gzip compressed file
bef46b70 1091 $tar->write( 'out.tgz', COMPRESS_GZIP );
642eb381 1092
e0d68803 1093 # write a bzip compressed file
bef46b70 1094 $tar->write( 'out.tbz', COMPRESS_BZIP );
39713df4
RGS
1095
1096Note that when you pass in a filehandle, the compression argument
1097is ignored, as all files are printed verbatim to your filehandle.
1098If you wish to enable compression with filehandles, use an
642eb381 1099C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1100
1101The third argument is an optional prefix. All files will be tucked
1102away in the directory you specify as prefix. So if you have files
1103'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1104will be written to the archive as 'foo/a' and 'foo/b'.
1105
1106If no arguments are given, C<write> returns the entire formatted
1107archive as a string, which could be useful if you'd like to stuff the
1108archive into a socket or a pipe to gzip or something.
1109
642eb381 1110
39713df4
RGS
1111=cut
1112
1113sub write {
1114 my $self = shift;
1115 my $file = shift; $file = '' unless defined $file;
1116 my $gzip = shift || 0;
1117 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1118 my $dummy = '';
e0d68803 1119
39713df4
RGS
1120 ### only need a handle if we have a file to print to ###
1121 my $handle = length($file)
1122 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1123 or return )
1124 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
e0d68803 1125 : $HAS_IO_STRING ? IO::String->new
39713df4
RGS
1126 : __PACKAGE__->no_string_support();
1127
e0d68803
JB
1128 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1129 ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1130 ### printed to the archive
1131 local $\;
39713df4
RGS
1132
1133 for my $entry ( @{$self->_data} ) {
1134 ### entries to be written to the tarfile ###
1135 my @write_me;
1136
1137 ### only now will we change the object to reflect the current state
1138 ### of the name and prefix fields -- this needs to be limited to
1139 ### write() only!
1140 my $clone = $entry->clone;
1141
1142
e0d68803 1143 ### so, if you don't want use to use the prefix, we'll stuff
39713df4
RGS
1144 ### everything in the name field instead
1145 if( $DO_NOT_USE_PREFIX ) {
1146
1147 ### you might have an extended prefix, if so, set it in the clone
1148 ### XXX is ::Unix right?
1149 $clone->name( length $ext_prefix
1150 ? File::Spec::Unix->catdir( $ext_prefix,
1151 $clone->full_path)
1152 : $clone->full_path );
1153 $clone->prefix( '' );
1154
1155 ### otherwise, we'll have to set it properly -- prefix part in the
1156 ### prefix and name part in the name field.
1157 } else {
1158
1159 ### split them here, not before!
1160 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1161
1162 ### you might have an extended prefix, if so, set it in the clone
1163 ### XXX is ::Unix right?
1164 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1165 if length $ext_prefix;
1166
1167 $clone->prefix( $prefix );
1168 $clone->name( $name );
1169 }
1170
1171 ### names are too long, and will get truncated if we don't add a
1172 ### '@LongLink' file...
1173 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
1174 length($clone->prefix) > PREFIX_LENGTH
1175 ) || 0;
1176
1177 ### perhaps we need to make a longlink file?
1178 if( $make_longlink ) {
1179 my $longlink = Archive::Tar::File->new(
1180 data => LONGLINK_NAME,
1181 $clone->full_path,
1182 { type => LONGLINK }
1183 );
1184
1185 unless( $longlink ) {
1186 $self->_error( qq[Could not create 'LongLink' entry for ] .
1187 qq[oversize file '] . $clone->full_path ."'" );
1188 return;
1189 };
1190
1191 push @write_me, $longlink;
1192 }
1193
1194 push @write_me, $clone;
1195
1196 ### write the one, optionally 2 a::t::file objects to the handle
1197 for my $clone (@write_me) {
1198
1199 ### if the file is a symlink, there are 2 options:
1200 ### either we leave the symlink intact, but then we don't write any
1201 ### data OR we follow the symlink, which means we actually make a
1202 ### copy. if we do the latter, we have to change the TYPE of the
1203 ### clone to 'FILE'
1204 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1205 my $data_ok = !$clone->is_symlink && $clone->has_content;
1206
1207 ### downgrade to a 'normal' file if it's a symlink we're going to
1208 ### treat as a regular file
1209 $clone->_downgrade_to_plainfile if $link_ok;
1210
1211 ### get the header for this block
1212 my $header = $self->_format_tar_entry( $clone );
1213 unless( $header ) {
1214 $self->_error(q[Could not format header for: ] .
1215 $clone->full_path );
1216 return;
1217 }
1218
1219 unless( print $handle $header ) {
1220 $self->_error(q[Could not write header for: ] .
1221 $clone->full_path);
1222 return;
1223 }
1224
1225 if( $link_ok or $data_ok ) {
1226 unless( print $handle $clone->data ) {
1227 $self->_error(q[Could not write data for: ] .
1228 $clone->full_path);
1229 return;
1230 }
1231
1232 ### pad the end of the clone if required ###
1233 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1234 }
1235
1236 } ### done writing these entries
1237 }
1238
1239 ### write the end markers ###
1240 print $handle TAR_END x 2 or
1241 return $self->_error( qq[Could not write tar end markers] );
b30bcf62 1242
39713df4 1243 ### did you want it written to a file, or returned as a string? ###
b30bcf62 1244 my $rv = length($file) ? 1
39713df4 1245 : $HAS_PERLIO ? $dummy
b30bcf62
RGS
1246 : do { seek $handle, 0, 0; local $/; <$handle> };
1247
d33cd7cf
CBW
1248 ### make sure to close the handle if we created it
1249 close $handle unless ref($file);
e0d68803 1250
b30bcf62 1251 return $rv;
39713df4
RGS
1252}
1253
1254sub _format_tar_entry {
1255 my $self = shift;
1256 my $entry = shift or return;
1257 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1258 my $no_prefix = shift || 0;
1259
1260 my $file = $entry->name;
1261 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1262
1263 ### remove the prefix from the file name
1264 ### not sure if this is still neeeded --kane
1265 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1266 ### this for us. Even worse, this would break if we tried to add a file
1267 ### like x/x.
1268 #if( length $prefix ) {
1269 # $file =~ s/^$match//;
1270 #}
1271
1272 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1273 if length $ext_prefix;
1274
1275 ### not sure why this is... ###
1276 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1277 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1278
d33cd7cf 1279 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
39713df4
RGS
1280
1281 ### this might be optimizable with a 'changed' flag in the file objects ###
1282 my $tar = pack (
1283 PACK,
1284 $file,
1285
1286 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1287 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1288
1289 "", # checksum field - space padded a bit down
1290
1291 (map { $entry->$_() } qw[type linkname magic]),
1292
1293 $entry->version || TAR_VERSION,
1294
1295 (map { $entry->$_() } qw[uname gname]),
1296 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1297
1298 ($no_prefix ? '' : $prefix)
1299 );
1300
1301 ### add the checksum ###
d33cd7cf 1302 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
39713df4
RGS
1303 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1304
1305 return $tar;
1306}
1307
1308=head2 $tar->add_files( @filenamelist )
1309
1310Takes a list of filenames and adds them to the in-memory archive.
1311
1312The path to the file is automatically converted to a Unix like
1313equivalent for use in the archive, and, if on MacOS, the file's
1314modification time is converted from the MacOS epoch to the Unix epoch.
1315So tar archives created on MacOS with B<Archive::Tar> can be read
1316both with I<tar> on Unix and applications like I<suntar> or
1317I<Stuffit Expander> on MacOS.
1318
1319Be aware that the file's type/creator and resource fork will be lost,
1320which is usually what you want in cross-platform archives.
1321
2610e7a4
JB
1322Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1323object from, for example, another archive. The object will be clone, and
1324effectively be a copy of the original, not an alias.
1325
39713df4
RGS
1326Returns a list of C<Archive::Tar::File> objects that were just added.
1327
1328=cut
1329
1330sub add_files {
1331 my $self = shift;
1332 my @files = @_ or return;
1333
1334 my @rv;
1335 for my $file ( @files ) {
2610e7a4
JB
1336
1337 ### you passed an Archive::Tar::File object
1338 ### clone it so we don't accidentally have a reference to
1339 ### an object from another archive
1340 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
e0d68803 1341 push @rv, $file->clone;
2610e7a4
JB
1342 next;
1343 }
e0d68803 1344
c3745331 1345 unless( -e $file || -l $file ) {
39713df4
RGS
1346 $self->_error( qq[No such file: '$file'] );
1347 next;
1348 }
1349
1350 my $obj = Archive::Tar::File->new( file => $file );
1351 unless( $obj ) {
1352 $self->_error( qq[Unable to add file: '$file'] );
1353 next;
1354 }
1355
1356 push @rv, $obj;
1357 }
1358
1359 push @{$self->{_data}}, @rv;
1360
1361 return @rv;
1362}
1363
1364=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1365
1366Takes a filename, a scalar full of data and optionally a reference to
1367a hash with specific options.
1368
1369Will add a file to the in-memory archive, with name C<$filename> and
1370content C<$data>. Specific properties can be set using C<$opthashref>.
1371The following list of properties is supported: name, size, mtime
1372(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1373devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4
RGS
1374modification times are converted to Unix equivalents.)
1375
b3200c5d
SP
1376Valid values for the file type are the following constants defined in
1377Archive::Tar::Constants:
1378
1379=over 4
1380
1381=item FILE
1382
1383Regular file.
1384
1385=item HARDLINK
1386
1387=item SYMLINK
1388
1389Hard and symbolic ("soft") links; linkname should specify target.
1390
1391=item CHARDEV
1392
1393=item BLOCKDEV
1394
1395Character and block devices. devmajor and devminor should specify the major
1396and minor device numbers.
1397
1398=item DIR
1399
1400Directory.
1401
1402=item FIFO
1403
1404FIFO (named pipe).
1405
1406=item SOCKET
1407
1408Socket.
1409
1410=back
1411
39713df4
RGS
1412Returns the C<Archive::Tar::File> object that was just added, or
1413C<undef> on failure.
1414
1415=cut
1416
1417sub add_data {
1418 my $self = shift;
1419 my ($file, $data, $opt) = @_;
1420
1421 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1422 unless( $obj ) {
1423 $self->_error( qq[Unable to add file: '$file'] );
1424 return;
1425 }
1426
1427 push @{$self->{_data}}, $obj;
1428
1429 return $obj;
1430}
1431
1432=head2 $tar->error( [$BOOL] )
1433
1434Returns the current errorstring (usually, the last error reported).
1435If a true value was specified, it will give the C<Carp::longmess>
1436equivalent of the error, in effect giving you a stacktrace.
1437
1438For backwards compatibility, this error is also available as
1439C<$Archive::Tar::error> although it is much recommended you use the
1440method call instead.
1441
1442=cut
1443
1444{
1445 $error = '';
1446 my $longmess;
1447
1448 sub _error {
1449 my $self = shift;
1450 my $msg = $error = shift;
1451 $longmess = Carp::longmess($error);
941cb2bb
CBW
1452 if (ref $self) {
1453 $self->{_error} = $error;
1454 $self->{_longmess} = $longmess;
1455 }
39713df4
RGS
1456
1457 ### set Archive::Tar::WARN to 0 to disable printing
1458 ### of errors
1459 if( $WARN ) {
1460 carp $DEBUG ? $longmess : $msg;
1461 }
1462
1463 return;
1464 }
1465
1466 sub error {
1467 my $self = shift;
941cb2bb
CBW
1468 if (ref $self) {
1469 return shift() ? $self->{_longmess} : $self->{_error};
1470 } else {
1471 return shift() ? $longmess : $error;
1472 }
39713df4
RGS
1473 }
1474}
1475
f38c1908
SP
1476=head2 $tar->setcwd( $cwd );
1477
1478C<Archive::Tar> needs to know the current directory, and it will run
e0d68803 1479C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
f38c1908 1480tarfile and saves it in the file system. (As of version 1.30, however,
e0d68803 1481C<Archive::Tar> will use the speed optimization described below
f38c1908
SP
1482automatically, so it's only relevant if you're using C<extract_file()>).
1483
1484Since C<Archive::Tar> doesn't change the current directory internally
1485while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1486can be avoided if we can guarantee that the current directory doesn't
1487get changed externally.
1488
1489To use this performance boost, set the current directory via
1490
1491 use Cwd;
1492 $tar->setcwd( cwd() );
1493
1494once before calling a function like C<extract_file> and
1495C<Archive::Tar> will use the current directory setting from then on
e0d68803 1496and won't call C<Cwd::cwd()> internally.
f38c1908
SP
1497
1498To switch back to the default behaviour, use
1499
1500 $tar->setcwd( undef );
1501
1502and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1503
1504If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will
1505be called for you.
1506
e0d68803 1507=cut
f38c1908
SP
1508
1509sub setcwd {
1510 my $self = shift;
1511 my $cwd = shift;
1512
1513 $self->{cwd} = $cwd;
1514}
39713df4 1515
39713df4
RGS
1516=head1 Class Methods
1517
642eb381 1518=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
39713df4
RGS
1519
1520Creates a tar file from the list of files provided. The first
1521argument can either be the name of the tar file to create or a
1522reference to an open file handle (e.g. a GLOB reference).
1523
e0d68803 1524The second argument is used to indicate compression. You can either
642eb381 1525compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1526to be the C<gzip> compression level (between 1 and 9), but the use of
642eb381
SH
1527constants is prefered:
1528
1529 # write a gzip compressed file
bef46b70 1530 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
642eb381 1531
e0d68803 1532 # write a bzip compressed file
bef46b70 1533 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
39713df4
RGS
1534
1535Note that when you pass in a filehandle, the compression argument
1536is ignored, as all files are printed verbatim to your filehandle.
1537If you wish to enable compression with filehandles, use an
642eb381 1538C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1539
1540The remaining arguments list the files to be included in the tar file.
1541These files must all exist. Any files which don't exist or can't be
1542read are silently ignored.
1543
1544If the archive creation fails for any reason, C<create_archive> will
1545return false. Please use the C<error> method to find the cause of the
1546failure.
1547
1548Note that this method does not write C<on the fly> as it were; it
1549still reads all the files into memory before writing out the archive.
1550Consult the FAQ below if this is a problem.
1551
1552=cut
1553
1554sub create_archive {
1555 my $class = shift;
1556
1557 my $file = shift; return unless defined $file;
1558 my $gzip = shift || 0;
1559 my @files = @_;
1560
1561 unless( @files ) {
1562 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1563 }
1564
1565 my $tar = $class->new;
1566 $tar->add_files( @files );
1567 return $tar->write( $file, $gzip );
1568}
1569
642eb381
SH
1570=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1571
1572Returns an iterator function that reads the tar file without loading
1573it all in memory. Each time the function is called it will return the
1574next file in the tarball. The files are returned as
1575C<Archive::Tar::File> objects. The iterator function returns the
941cb2bb 1576empty list once it has exhausted the files contained.
642eb381
SH
1577
1578The second argument can be a hash reference with options, which are
1579identical to the arguments passed to C<read()>.
1580
1581Example usage:
1582
1583 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1584
1585 while( my $f = $next->() ) {
1586 print $f->name, "\n";
1587
1588 $f->extract or warn "Extraction failed";
e0d68803 1589
642eb381
SH
1590 # ....
1591 }
1592
1593=cut
1594
1595
1596sub iter {
1597 my $class = shift;
1598 my $filename = shift or return;
1599 my $compressed = shift or 0;
1600 my $opts = shift || {};
1601
1602 ### get a handle to read from.
1603 my $handle = $class->_get_handle(
e0d68803
JB
1604 $filename,
1605 $compressed,
642eb381
SH
1606 READ_ONLY->( ZLIB )
1607 ) or return;
1608
1609 my @data;
1610 return sub {
1611 return shift(@data) if @data; # more than one file returned?
1612 return unless $handle; # handle exhausted?
1613
1614 ### read data, should only return file
941cb2bb
CBW
1615 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1616 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
642eb381
SH
1617
1618 ### return one piece of data
1619 return shift(@data) if @data;
e0d68803 1620
642eb381
SH
1621 ### data is exhausted, free the filehandle
1622 undef $handle;
1623 return;
1624 };
1625}
1626
1627=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
39713df4
RGS
1628
1629Returns a list of the names of all the files in the archive. The
1630first argument can either be the name of the tar file to list or a
1631reference to an open file handle (e.g. a GLOB reference).
1632
1633If C<list_archive()> is passed an array reference as its third
1634argument it returns a list of hash references containing the requested
1635properties of each file. The following list of properties is
e0d68803 1636supported: full_path, name, size, mtime (last modified date), mode,
b3200c5d
SP
1637uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1638
1639See C<Archive::Tar::File> for details about supported properties.
39713df4
RGS
1640
1641Passing an array reference containing only one element, 'name', is
1642special cased to return a list of names rather than a list of hash
1643references.
1644
1645=cut
1646
1647sub list_archive {
1648 my $class = shift;
1649 my $file = shift; return unless defined $file;
1650 my $gzip = shift || 0;
1651
1652 my $tar = $class->new($file, $gzip);
1653 return unless $tar;
1654
1655 return $tar->list_files( @_ );
1656}
1657
642eb381 1658=head2 Archive::Tar->extract_archive($file, $compressed)
39713df4
RGS
1659
1660Extracts the contents of the tar file. The first argument can either
1661be the name of the tar file to create or a reference to an open file
1662handle (e.g. a GLOB reference). All relative paths in the tar file will
1663be created underneath the current working directory.
1664
1665C<extract_archive> will return a list of files it extracted.
1666If the archive extraction fails for any reason, C<extract_archive>
1667will return false. Please use the C<error> method to find the cause
1668of the failure.
1669
1670=cut
1671
1672sub extract_archive {
1673 my $class = shift;
1674 my $file = shift; return unless defined $file;
1675 my $gzip = shift || 0;
1676
1677 my $tar = $class->new( ) or return;
1678
1679 return $tar->read( $file, $gzip, { extract => 1 } );
1680}
1681
f5695358
JB
1682=head2 $bool = Archive::Tar->has_io_string
1683
1684Returns true if we currently have C<IO::String> support loaded.
1685
e0d68803 1686Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1687stringified archives. Currently, C<perlio> is the preferred method, if
1688available.
1689
1690See the C<GLOBAL VARIABLES> section to see how to change this preference.
1691
1692=cut
1693
1694sub has_io_string { return $HAS_IO_STRING; }
1695
1696=head2 $bool = Archive::Tar->has_perlio
1697
1698Returns true if we currently have C<perlio> support loaded.
1699
e0d68803 1700This requires C<perl-5.8> or higher, compiled with C<perlio>
f5695358 1701
e0d68803 1702Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1703stringified archives. Currently, C<perlio> is the preferred method, if
1704available.
1705
1706See the C<GLOBAL VARIABLES> section to see how to change this preference.
1707
1708=cut
1709
1710sub has_perlio { return $HAS_PERLIO; }
1711
1712=head2 $bool = Archive::Tar->has_zlib_support
1713
1714Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1715
1716=cut
1717
1718sub has_zlib_support { return ZLIB }
1719
1720=head2 $bool = Archive::Tar->has_bzip2_support
1721
1722Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1723
1724=cut
1725
1726sub has_bzip2_support { return BZIP }
1727
39713df4
RGS
1728=head2 Archive::Tar->can_handle_compressed_files
1729
1730A simple checking routine, which will return true if C<Archive::Tar>
642eb381
SH
1731is able to uncompress compressed archives on the fly with C<IO::Zlib>
1732and C<IO::Compress::Bzip2> or false if not both are installed.
39713df4
RGS
1733
1734You can use this as a shortcut to determine whether C<Archive::Tar>
1735will do what you think before passing compressed archives to its
1736C<read> method.
1737
1738=cut
1739
642eb381 1740sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
39713df4
RGS
1741
1742sub no_string_support {
1743 croak("You have to install IO::String to support writing archives to strings");
1744}
1745
17461;
1747
1748__END__
1749
1750=head1 GLOBAL VARIABLES
1751
1752=head2 $Archive::Tar::FOLLOW_SYMLINK
1753
1754Set this variable to C<1> to make C<Archive::Tar> effectively make a
1755copy of the file when extracting. Default is C<0>, which
1756means the symlink stays intact. Of course, you will have to pack the
1757file linked to as well.
1758
1759This option is checked when you write out the tarfile using C<write>
1760or C<create_archive>.
1761
1762This works just like C</bin/tar>'s C<-h> option.
1763
1764=head2 $Archive::Tar::CHOWN
1765
1766By default, C<Archive::Tar> will try to C<chown> your files if it is
1767able to. In some cases, this may not be desired. In that case, set
1768this variable to C<0> to disable C<chown>-ing, even if it were
1769possible.
1770
1771The default is C<1>.
1772
1773=head2 $Archive::Tar::CHMOD
1774
1775By default, C<Archive::Tar> will try to C<chmod> your files to
1776whatever mode was specified for the particular file in the archive.
1777In some cases, this may not be desired. In that case, set this
1778variable to C<0> to disable C<chmod>-ing.
1779
1780The default is C<1>.
1781
1c82faa7
JB
1782=head2 $Archive::Tar::SAME_PERMISSIONS
1783
1784When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
1785the permissions on files from the archive are used without modification
1786of if they are filtered by removing any setid bits and applying the
1787current umask.
1788
1789The default is C<1> for the root user and C<0> for normal users.
1790
39713df4
RGS
1791=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1792
e0d68803 1793By default, C<Archive::Tar> will try to put paths that are over
f38c1908 1794100 characters in the C<prefix> field of your tar header, as
e0d68803
JB
1795defined per POSIX-standard. However, some (older) tar programs
1796do not implement this spec. To retain compatibility with these older
1797or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1798variable to a true value, and C<Archive::Tar> will use an alternate
1799way of dealing with paths over 100 characters by using the
f38c1908
SP
1800C<GNU Extended Header> feature.
1801
1802Note that clients who do not support the C<GNU Extended Header>
1803feature will not be able to read these archives. Such clients include
1804tars on C<Solaris>, C<Irix> and C<AIX>.
39713df4
RGS
1805
1806The default is C<0>.
1807
1808=head2 $Archive::Tar::DEBUG
1809
1810Set this variable to C<1> to always get the C<Carp::longmess> output
1811of the warnings, instead of the regular C<carp>. This is the same
1812message you would get by doing:
1813
1814 $tar->error(1);
1815
1816Defaults to C<0>.
1817
1818=head2 $Archive::Tar::WARN
1819
1820Set this variable to C<0> if you do not want any warnings printed.
1821Personally I recommend against doing this, but people asked for the
1822option. Also, be advised that this is of course not threadsafe.
1823
1824Defaults to C<1>.
1825
1826=head2 $Archive::Tar::error
1827
1828Holds the last reported error. Kept for historical reasons, but its
1829use is very much discouraged. Use the C<error()> method instead:
1830
1831 warn $tar->error unless $tar->extract;
1832
941cb2bb
CBW
1833Note that in older versions of this module, the C<error()> method
1834would return an effectively global value even when called an instance
1835method as above. This has since been fixed, and multiple instances of
1836C<Archive::Tar> now have separate error strings.
1837
178aef9a
RGS
1838=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
1839
1840This variable indicates whether C<Archive::Tar> should allow
1841files to be extracted outside their current working directory.
1842
1843Allowing this could have security implications, as a malicious
1844tar archive could alter or replace any file the extracting user
e0d68803
JB
1845has permissions to. Therefor, the default is to not allow
1846insecure extractions.
178aef9a 1847
e0d68803
JB
1848If you trust the archive, or have other reasons to allow the
1849archive to write files outside your current working directory,
178aef9a
RGS
1850set this variable to C<true>.
1851
1852Note that this is a backwards incompatible change from version
1853C<1.36> and before.
1854
39713df4
RGS
1855=head2 $Archive::Tar::HAS_PERLIO
1856
e0d68803 1857This variable holds a boolean indicating if we currently have
39713df4 1858C<perlio> support loaded. This will be enabled for any perl
e0d68803 1859greater than C<5.8> compiled with C<perlio>.
39713df4
RGS
1860
1861If you feel strongly about disabling it, set this variable to
1862C<false>. Note that you will then need C<IO::String> installed
1863to support writing stringified archives.
1864
1865Don't change this variable unless you B<really> know what you're
1866doing.
1867
1868=head2 $Archive::Tar::HAS_IO_STRING
1869
e0d68803 1870This variable holds a boolean indicating if we currently have
39713df4
RGS
1871C<IO::String> support loaded. This will be enabled for any perl
1872that has a loadable C<IO::String> module.
1873
1874If you feel strongly about disabling it, set this variable to
1875C<false>. Note that you will then need C<perlio> support from
1876your perl to be able to write stringified archives.
1877
1878Don't change this variable unless you B<really> know what you're
1879doing.
1880
d33cd7cf
CBW
1881=head2 $Archive::Tar::ZERO_PAD_NUMBERS
1882
1883This variable holds a boolean indicating if we will create
1884zero padded numbers for C<size>, C<mtime> and C<checksum>.
1885The default is C<0>, indicating that we will create space padded
1886numbers. Added for compatibility with C<busybox> implementations.
1887
39713df4
RGS
1888=head1 FAQ
1889
1890=over 4
1891
1892=item What's the minimum perl version required to run Archive::Tar?
1893
1894You will need perl version 5.005_03 or newer.
1895
1896=item Isn't Archive::Tar slow?
1897
1898Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1899However, it's very portable. If speed is an issue, consider using
1900C</bin/tar> instead.
1901
1902=item Isn't Archive::Tar heavier on memory than /bin/tar?
1903
1904Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1905C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1906choice but to read the archive into memory.
1907This is ok if you want to do in-memory manipulation of the archive.
642eb381 1908
39713df4
RGS
1909If you just want to extract, use the C<extract_archive> class method
1910instead. It will optimize and write to disk immediately.
1911
642eb381
SH
1912Another option is to use the C<iter> class method to iterate over
1913the files in the tarball without reading them all in memory at once.
1914
1915=item Can you lazy-load data instead?
39713df4 1916
642eb381
SH
1917In some cases, yes. You can use the C<iter> class method to iterate
1918over the files in the tarball without reading them all in memory at once.
39713df4
RGS
1919
1920=item How much memory will an X kb tar file need?
1921
1922Probably more than X kb, since it will all be read into memory. If
1923this is a problem, and you don't need to do in memory manipulation
e0d68803 1924of the archive, consider using the C<iter> class method, or C</bin/tar>
642eb381 1925instead.
39713df4
RGS
1926
1927=item What do you do with unsupported filetypes in an archive?
1928
1929C<Unix> has a few filetypes that aren't supported on other platforms,
1930like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1931try to make a copy of the original file, rather than throwing an error.
1932
1933This does require you to read the entire archive in to memory first,
1934since otherwise we wouldn't know what data to fill the copy with.
e0d68803
JB
1935(This means that you cannot use the class methods, including C<iter>
1936on archives that have incompatible filetypes and still expect things
642eb381 1937to work).
39713df4
RGS
1938
1939For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1940the extraction of this particular item didn't work.
1941
f38c1908
SP
1942=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
1943
1944By default, C<Archive::Tar> is in a completely POSIX-compatible
1945mode, which uses the POSIX-specification of C<tar> to store files.
1946For paths greather than 100 characters, this is done using the
1947C<POSIX header prefix>. Non-POSIX-compatible clients may not support
1948this part of the specification, and may only support the C<GNU Extended
1949Header> functionality. To facilitate those clients, you can set the
e0d68803 1950C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
f38c1908
SP
1951C<GLOBAL VARIABLES> section for details on this variable.
1952
c3745331
RGS
1953Note that GNU tar earlier than version 1.14 does not cope well with
1954the C<POSIX header prefix>. If you use such a version, consider setting
1955the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
1956
b30bcf62
RGS
1957=item How do I extract only files that have property X from an archive?
1958
1959Sometimes, you might not wish to extract a complete archive, just
1960the files that are relevant to you, based on some criteria.
1961
1962You can do this by filtering a list of C<Archive::Tar::File> objects
1963based on your criteria. For example, to extract only files that have
1964the string C<foo> in their title, you would use:
1965
e0d68803 1966 $tar->extract(
b30bcf62 1967 grep { $_->full_path =~ /foo/ } $tar->get_files
e0d68803 1968 );
b30bcf62
RGS
1969
1970This way, you can filter on any attribute of the files in the archive.
1971Consult the C<Archive::Tar::File> documentation on how to use these
1972objects.
1973
81a5970e
RGS
1974=item How do I access .tar.Z files?
1975
1976The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
1977the C<IO::Zlib> module) to access tar files that have been compressed
1978with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
1979utility cannot be read by C<Compress::Zlib> and so cannot be directly
1980accesses by C<Archive::Tar>.
1981
1982If the C<uncompress> or C<gunzip> programs are available, you can use
1983one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
1984
1985Firstly with C<uncompress>
1986
1987 use Archive::Tar;
1988
1989 open F, "uncompress -c $filename |";
1990 my $tar = Archive::Tar->new(*F);
1991 ...
1992
1993and this with C<gunzip>
1994
1995 use Archive::Tar;
1996
1997 open F, "gunzip -c $filename |";
1998 my $tar = Archive::Tar->new(*F);
1999 ...
2000
2001Similarly, if the C<compress> program is available, you can use this to
2002write a C<.tar.Z> file
2003
2004 use Archive::Tar;
2005 use IO::File;
2006
2007 my $fh = new IO::File "| compress -c >$filename";
2008 my $tar = Archive::Tar->new();
2009 ...
2010 $tar->write($fh);
2011 $fh->close ;
2012
01d11a1c
SP
2013=item How do I handle Unicode strings?
2014
2015C<Archive::Tar> uses byte semantics for any files it reads from or writes
2016to disk. This is not a problem if you only deal with files and never
2017look at their content or work solely with byte strings. But if you use
2018Unicode strings with character semantics, some additional steps need
2019to be taken.
2020
2021For example, if you add a Unicode string like
2022
2023 # Problem
2024 $tar->add_data('file.txt', "Euro: \x{20AC}");
2025
2026then there will be a problem later when the tarfile gets written out
2027to disk via C<$tar->write()>:
2028
2029 Wide character in print at .../Archive/Tar.pm line 1014.
2030
2031The data was added as a Unicode string and when writing it out to disk,
2032the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2033tried to convert the string to ISO-8859 and failed. The written file
2034now contains garbage.
2035
2036For this reason, Unicode strings need to be converted to UTF-8-encoded
2037bytestrings before they are handed off to C<add_data()>:
2038
2039 use Encode;
2040 my $data = "Accented character: \x{20AC}";
2041 $data = encode('utf8', $data);
2042
2043 $tar->add_data('file.txt', $data);
2044
e0d68803 2045A opposite problem occurs if you extract a UTF8-encoded file from a
01d11a1c
SP
2046tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2047will return its content as a bytestring, not as a Unicode string.
2048
2049If you want it to be a Unicode string (because you want character
2050semantics with operations like regular expression matching), you need
e0d68803 2051to decode the UTF8-encoded content and have Perl convert it into
01d11a1c
SP
2052a Unicode string:
2053
2054 use Encode;
2055 my $data = $tar->get_content();
e0d68803 2056
01d11a1c
SP
2057 # Make it a Unicode string
2058 $data = decode('utf8', $data);
2059
e0d68803 2060There is no easy way to provide this functionality in C<Archive::Tar>,
01d11a1c
SP
2061because a tarball can contain many files, and each of which could be
2062encoded in a different way.
81a5970e 2063
39713df4
RGS
2064=back
2065
f475b4a2
JB
2066=head1 CAVEATS
2067
2068The AIX tar does not fill all unused space in the tar archive with 0x00.
2069This sometimes leads to warning messages from C<Archive::Tar>.
2070
2071 Invalid header block at offset nnn
2072
2073A fix for that problem is scheduled to be released in the following levels
2074of AIX, all of which should be coming out in the 4th quarter of 2009:
2075
2076 AIX 5.3 TL7 SP10
2077 AIX 5.3 TL8 SP8
2078 AIX 5.3 TL9 SP5
2079 AIX 5.3 TL10 SP2
2080
2081 AIX 6.1 TL0 SP11
2082 AIX 6.1 TL1 SP7
2083 AIX 6.1 TL2 SP6
2084 AIX 6.1 TL3 SP3
2085
2086The IBM APAR number for this problem is IZ50240 (Reported component ID:
20875765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2088If you need an ifix please contact your local IBM AIX support.
2089
39713df4
RGS
2090=head1 TODO
2091
2092=over 4
2093
2094=item Check if passed in handles are open for read/write
2095
2096Currently I don't know of any portable pure perl way to do this.
2097Suggestions welcome.
2098
b3200c5d
SP
2099=item Allow archives to be passed in as string
2100
2101Currently, we only allow opened filehandles or filenames, but
2102not strings. The internals would need some reworking to facilitate
2103stringified archives.
2104
2105=item Facilitate processing an opened filehandle of a compressed archive
2106
2107Currently, we only support this if the filehandle is an IO::Zlib object.
2108Environments, like apache, will present you with an opened filehandle
2109to an uploaded file, which might be a compressed archive.
2110
39713df4
RGS
2111=back
2112
f38c1908
SP
2113=head1 SEE ALSO
2114
2115=over 4
2116
2117=item The GNU tar specification
2118
2119C<http://www.gnu.org/software/tar/manual/tar.html>
2120
2121=item The PAX format specication
2122
2123The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2124
2125=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2126
2127=item GNU tar intends to switch to POSIX compatibility
2128
2129GNU Tar authors have expressed their intention to become completely
2130POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2131
2132=item A Comparison between various tar implementations
2133
2134Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2135
2136=back
2137
39713df4
RGS
2138=head1 AUTHOR
2139
c3745331
RGS
2140This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2141
2142Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
39713df4
RGS
2143
2144=head1 ACKNOWLEDGEMENTS
2145
f475b4a2
JB
2146Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2147Rainer Tammer and especially Andrew Savige for their help and suggestions.
39713df4
RGS
2148
2149=head1 COPYRIGHT
2150
f475b4a2 2151This module is copyright (c) 2002 - 2009 Jos Boumans
c3745331 2152E<lt>kane@cpan.orgE<gt>. All rights reserved.
39713df4 2153
e0d68803 2154This library is free software; you may redistribute and/or modify
c3745331 2155it under the same terms as Perl itself.
39713df4
RGS
2156
2157=cut