This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Archive-Tar to CPAN version 1.82
[perl5.git] / cpan / Archive-Tar / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
f38c1908 2### http://www.gnu.org/software/tar/manual/tar.html
39713df4
RGS
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
642eb381
SH
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp qw(carp croak);
14use File::Spec ();
15use File::Spec::Unix ();
16use File::Path ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
39713df4
RGS
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
1c82faa7 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
d33cd7cf 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT
178aef9a
RGS
27 ];
28
642eb381 29@ISA = qw[Exporter];
bef46b70 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
178aef9a
RGS
31$DEBUG = 0;
32$WARN = 1;
33$FOLLOW_SYMLINK = 0;
f8c9502f 34$VERSION = "1.82";
178aef9a
RGS
35$CHOWN = 1;
36$CHMOD = 1;
1c82faa7 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0;
178aef9a
RGS
38$DO_NOT_USE_PREFIX = 0;
39$INSECURE_EXTRACT_MODE = 0;
d33cd7cf 40$ZERO_PAD_NUMBERS = 0;
39713df4
RGS
41
42BEGIN {
43 use Config;
44 $HAS_PERLIO = $Config::Config{useperlio};
45
46 ### try and load IO::String anyway, so you can dynamically
47 ### switch between perlio and IO::String
642eb381 48 $HAS_IO_STRING = eval {
39713df4
RGS
49 require IO::String;
50 import IO::String;
642eb381
SH
51 1;
52 } || 0;
39713df4
RGS
53}
54
39713df4
RGS
55=head1 NAME
56
57Archive::Tar - module for manipulations of tar archives
58
59=head1 SYNOPSIS
60
61 use Archive::Tar;
62 my $tar = Archive::Tar->new;
63
642eb381 64 $tar->read('origin.tgz');
39713df4
RGS
65 $tar->extract();
66
67 $tar->add_files('file/foo.pl', 'docs/README');
68 $tar->add_data('file/baz.txt', 'This is the contents now');
69
70 $tar->rename('oldname', 'new/file/name');
93e94d8a
CBW
71 $tar->chown('/', 'root');
72 $tar->chown('/', 'root:root');
1e915955 73 $tar->chmod('/tmp', '1777');
39713df4 74
642eb381 75 $tar->write('files.tar'); # plain tar
bef46b70
JB
76 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed
77 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed
39713df4
RGS
78
79=head1 DESCRIPTION
80
81Archive::Tar provides an object oriented mechanism for handling tar
82files. It provides class methods for quick and easy files handling
83while also allowing for the creation of tar file objects for custom
84manipulation. If you have the IO::Zlib module installed,
85Archive::Tar will also support compressed or gzipped tar files.
86
87An object of class Archive::Tar represents a .tar(.gz) archive full
88of files and things.
89
90=head1 Object Methods
91
92=head2 Archive::Tar->new( [$file, $compressed] )
93
94Returns a new Tar object. If given any arguments, C<new()> calls the
95C<read()> method automatically, passing on the arguments provided to
96the C<read()> method.
97
98If C<new()> is invoked with arguments and the C<read()> method fails
99for any reason, C<new()> returns undef.
100
101=cut
102
103my $tmpl = {
104 _data => [ ],
105 _file => 'Unknown',
106};
107
108### install get/set accessors for this object.
109for my $key ( keys %$tmpl ) {
110 no strict 'refs';
111 *{__PACKAGE__."::$key"} = sub {
112 my $self = shift;
113 $self->{$key} = $_[0] if @_;
114 return $self->{$key};
115 }
116}
117
118sub new {
119 my $class = shift;
120 $class = ref $class if ref $class;
121
122 ### copying $tmpl here since a shallow copy makes it use the
123 ### same aref, causing for files to remain in memory always.
941cb2bb 124 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
39713df4
RGS
125
126 if (@_) {
81a5970e
RGS
127 unless ( $obj->read( @_ ) ) {
128 $obj->_error(qq[No data could be read from file]);
129 return;
130 }
39713df4
RGS
131 }
132
133 return $obj;
134}
135
642eb381 136=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
39713df4
RGS
137
138Read the given tar file into memory.
139The first argument can either be the name of a file or a reference to
140an already open filehandle (or an IO::Zlib object if it's compressed)
39713df4
RGS
141
142The C<read> will I<replace> any previous content in C<$tar>!
143
e0d68803 144The second argument may be considered optional, but remains for
642eb381
SH
145backwards compatibility. Archive::Tar now looks at the file
146magic to determine what class should be used to open the file
147and will transparently Do The Right Thing.
148
149Archive::Tar will warn if you try to pass a bzip2 compressed file and the
150IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
39713df4 151
b3200c5d 152Note that you can currently B<not> pass a C<gzip> compressed
642eb381
SH
153filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
154filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
b3200c5d
SP
155containing the full archive information (either compressed or
156uncompressed). These are worth while features, but not currently
157implemented. See the C<TODO> section.
158
39713df4
RGS
159The third argument can be a hash reference with options. Note that
160all options are case-sensitive.
161
162=over 4
163
164=item limit
165
166Do not read more than C<limit> files. This is useful if you have
167very big archives, and are only interested in the first few files.
168
642eb381
SH
169=item filter
170
171Can be set to a regular expression. Only files with names that match
172the expression will be read.
173
f8c9502f
CBW
174=item md5
175
176Set to 1 and the md5sum of files will be returned (instead of file data)
177 my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} );
178 while( my $f = $iter->() ) {
179 print $f->data . "\t" . $f->full_path . $/;
180 }
181
39713df4
RGS
182=item extract
183
184If set to true, immediately extract entries when reading them. This
185gives you the same memory break as the C<extract_archive> function.
186Note however that entries will not be read into memory, but written
e0d68803 187straight to disk. This means no C<Archive::Tar::File> objects are
642eb381 188created for you to inspect.
39713df4
RGS
189
190=back
191
192All files are stored internally as C<Archive::Tar::File> objects.
193Please consult the L<Archive::Tar::File> documentation for details.
194
195Returns the number of files read in scalar context, and a list of
196C<Archive::Tar::File> objects in list context.
197
198=cut
199
200sub read {
201 my $self = shift;
202 my $file = shift;
203 my $gzip = shift || 0;
204 my $opts = shift || {};
205
206 unless( defined $file ) {
207 $self->_error( qq[No file to read from!] );
208 return;
209 } else {
210 $self->_file( $file );
211 }
212
213 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
214 or return;
215
216 my $data = $self->_read_tar( $handle, $opts ) or return;
217
218 $self->_data( $data );
219
220 return wantarray ? @$data : scalar @$data;
221}
222
223sub _get_handle {
642eb381
SH
224 my $self = shift;
225 my $file = shift; return unless defined $file;
642eb381
SH
226 my $compress = shift || 0;
227 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
228
afabe0e8
CBW
229 ### Check if file is a file handle or IO glob
230 if ( ref $file ) {
231 return $file if eval{ *$file{IO} };
232 return $file if eval{ $file->isa(q{IO::Handle}) };
233 $file = q{}.$file;
234 }
642eb381
SH
235
236 ### get a FH opened to the right class, so we can use it transparently
237 ### throughout the program
238 my $fh;
239 { ### reading magic only makes sense if we're opening a file for
240 ### reading. otherwise, just use what the user requested.
241 my $magic = '';
242 if( MODE_READ->($mode) ) {
243 open my $tmp, $file or do {
244 $self->_error( qq[Could not open '$file' for reading: $!] );
245 return;
246 };
e0d68803 247
642eb381
SH
248 ### read the first 4 bites of the file to figure out which class to
249 ### use to open the file.
e0d68803 250 sysread( $tmp, $magic, 4 );
642eb381
SH
251 close $tmp;
252 }
39713df4 253
642eb381
SH
254 ### is it bzip?
255 ### if you asked specifically for bzip compression, or if we're in
256 ### read mode and the magic numbers add up, use bzip
257 if( BZIP and (
e0d68803 258 ($compress eq COMPRESS_BZIP) or
642eb381
SH
259 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
260 )
261 ) {
e0d68803 262
642eb381
SH
263 ### different reader/writer modules, different error vars... sigh
264 if( MODE_READ->($mode) ) {
265 $fh = IO::Uncompress::Bunzip2->new( $file ) or do {
266 $self->_error( qq[Could not read '$file': ] .
267 $IO::Uncompress::Bunzip2::Bunzip2Error
268 );
269 return;
270 };
e0d68803 271
642eb381
SH
272 } else {
273 $fh = IO::Compress::Bzip2->new( $file ) or do {
274 $self->_error( qq[Could not write to '$file': ] .
275 $IO::Compress::Bzip2::Bzip2Error
276 );
277 return;
278 };
279 }
e0d68803 280
642eb381
SH
281 ### is it gzip?
282 ### if you asked for compression, if you wanted to read or the gzip
283 ### magic number is present (redundant with read)
284 } elsif( ZLIB and (
285 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
e0d68803 286 )
642eb381
SH
287 ) {
288 $fh = IO::Zlib->new;
39713df4 289
642eb381
SH
290 unless( $fh->open( $file, $mode ) ) {
291 $self->_error(qq[Could not create filehandle for '$file': $!]);
292 return;
293 }
e0d68803 294
642eb381 295 ### is it plain tar?
39713df4 296 } else {
642eb381 297 $fh = IO::File->new;
39713df4 298
642eb381
SH
299 unless( $fh->open( $file, $mode ) ) {
300 $self->_error(qq[Could not create filehandle for '$file': $!]);
301 return;
302 }
39713df4 303
642eb381
SH
304 ### enable bin mode on tar archives
305 binmode $fh;
e0d68803 306 }
642eb381 307 }
39713df4
RGS
308
309 return $fh;
310}
311
642eb381 312
39713df4
RGS
313sub _read_tar {
314 my $self = shift;
315 my $handle = shift or return;
316 my $opts = shift || {};
317
318 my $count = $opts->{limit} || 0;
642eb381 319 my $filter = $opts->{filter};
f8c9502f 320 my $md5 = $opts->{md5} || 0; # cdrake
afabe0e8 321 my $filter_cb = $opts->{filter_cb};
39713df4
RGS
322 my $extract = $opts->{extract} || 0;
323
324 ### set a cap on the amount of files to extract ###
325 my $limit = 0;
326 $limit = 1 if $count > 0;
327
328 my $tarfile = [ ];
329 my $chunk;
330 my $read = 0;
331 my $real_name; # to set the name of a file when
332 # we're encountering @longlink
333 my $data;
334
335 LOOP:
336 while( $handle->read( $chunk, HEAD ) ) {
337 ### IO::Zlib doesn't support this yet
338 my $offset = eval { tell $handle } || 'unknown';
d33cd7cf 339 $@ = '';
39713df4
RGS
340
341 unless( $read++ ) {
342 my $gzip = GZIP_MAGIC_NUM;
343 if( $chunk =~ /$gzip/ ) {
344 $self->_error( qq[Cannot read compressed format in tar-mode] );
345 return;
346 }
93e94d8a 347
4feb3b72
JB
348 ### size is < HEAD, which means a corrupted file, as the minimum
349 ### length is _at least_ HEAD
350 if (length $chunk != HEAD) {
351 $self->_error( qq[Cannot read enough bytes from the tarfile] );
352 return;
353 }
39713df4
RGS
354 }
355
356 ### if we can't read in all bytes... ###
357 last if length $chunk != HEAD;
358
359 ### Apparently this should really be two blocks of 512 zeroes,
360 ### but GNU tar sometimes gets it wrong. See comment in the
361 ### source code (tar.c) to GNU cpio.
362 next if $chunk eq TAR_END;
363
b30bcf62
RGS
364 ### according to the posix spec, the last 12 bytes of the header are
365 ### null bytes, to pad it to a 512 byte block. That means if these
eadbb00b 366 ### bytes are NOT null bytes, it's a corrupt header. See:
b30bcf62
RGS
367 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
368 ### line 111
369 { my $nulls = join '', "\0" x 12;
370 unless( $nulls eq substr( $chunk, 500, 12 ) ) {
371 $self->_error( qq[Invalid header block at offset $offset] );
372 next LOOP;
373 }
374 }
375
81a5970e
RGS
376 ### pass the realname, so we can set it 'proper' right away
377 ### some of the heuristics are done on the name, so important
378 ### to set it ASAP
39713df4 379 my $entry;
81a5970e
RGS
380 { my %extra_args = ();
381 $extra_args{'name'} = $$real_name if defined $real_name;
e0d68803
JB
382
383 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
384 %extra_args )
81a5970e
RGS
385 ) {
386 $self->_error( qq[Couldn't read chunk at offset $offset] );
b30bcf62 387 next LOOP;
81a5970e 388 }
39713df4
RGS
389 }
390
391 ### ignore labels:
d33cd7cf 392 ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
39713df4
RGS
393 next if $entry->is_label;
394
395 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
396
397 if ( $entry->is_file && !$entry->validate ) {
398 ### sometimes the chunk is rather fux0r3d and a whole 512
c3745331 399 ### bytes ends up in the ->name area.
39713df4
RGS
400 ### clean it up, if need be
401 my $name = $entry->name;
402 $name = substr($name, 0, 100) if length $name > 100;
403 $name =~ s/\n/ /g;
404
405 $self->_error( $name . qq[: checksum error] );
406 next LOOP;
407 }
408
409 my $block = BLOCK_SIZE->( $entry->size );
410
411 $data = $entry->get_content_by_ref;
412
afabe0e8 413 my $skip = 0;
f8c9502f 414 my $ctx; # cdrake
afabe0e8 415 ### skip this entry if we're filtering
f8c9502f
CBW
416
417 if($md5) { # cdrake
418 $ctx = Digest::MD5->new; # cdrake
419 $skip=5; # cdrake
420
421 } elsif ($filter && $entry->name !~ $filter) {
afabe0e8
CBW
422 $skip = 1;
423
424 ### skip this entry if it's a pax header. This is a special file added
425 ### by, among others, git-generated tarballs. It holds comments and is
426 ### not meant for extracting. See #38932: pax_global_header extracted
c3e6accb 427 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
afabe0e8
CBW
428 $skip = 2;
429 } elsif ($filter_cb && ! $filter_cb->($entry)) {
430 $skip = 3;
431 }
432
433 if ($skip) {
434 #
435 # Since we're skipping, do not allocate memory for the
93e94d8a 436 # whole file. Read it 64 BLOCKS at a time. Do not
afabe0e8
CBW
437 # complete the skip yet because maybe what we read is a
438 # longlink and it won't get skipped after all
439 #
440 my $amt = $block;
f8c9502f 441 my $fsz=$entry->size; # cdrake
afabe0e8
CBW
442 while ($amt > 0) {
443 $$data = '';
444 my $this = 64 * BLOCK;
445 $this = $amt if $this > $amt;
446 if( $handle->read( $$data, $this ) < $this ) {
447 $self->_error( qq[Read error on tarfile (missing data) '].
448 $entry->full_path ."' at offset $offset" );
449 next LOOP;
450 }
451 $amt -= $this;
f8c9502f
CBW
452 $fsz -= $this; # cdrake
453 substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake
454 $ctx->add($$data) if($skip==5); # cdrake
afabe0e8 455 }
f8c9502f 456 $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake
afabe0e8
CBW
457 } else {
458
459 ### just read everything into memory
460 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
461 ### this is because Compress::Zlib doesn't support it =/
462 ### this reads in the whole data in one read() call.
463 if ( $handle->read( $$data, $block ) < $block ) {
464 $self->_error( qq[Read error on tarfile (missing data) '].
39713df4 465 $entry->full_path ."' at offset $offset" );
afabe0e8
CBW
466 next LOOP;
467 }
468 ### throw away trailing garbage ###
469 substr ($$data, $entry->size) = "" if defined $$data;
39713df4
RGS
470 }
471
39713df4
RGS
472 ### part II of the @LongLink munging -- need to do /after/
473 ### the checksum check.
474 if( $entry->is_longlink ) {
475 ### weird thing in tarfiles -- if the file is actually a
476 ### @LongLink, the data part seems to have a trailing ^@
477 ### (unprintable) char. to display, pipe output through less.
478 ### but that doesn't *always* happen.. so check if the last
479 ### character is a control character, and if so remove it
480 ### at any rate, we better remove that character here, or tests
481 ### like 'eq' and hashlook ups based on names will SO not work
482 ### remove it by calculating the proper size, and then
483 ### tossing out everything that's longer than that size.
484
485 ### count number of nulls
486 my $nulls = $$data =~ tr/\0/\0/;
487
488 ### cut data + size by that many bytes
489 $entry->size( $entry->size - $nulls );
490 substr ($$data, $entry->size) = "";
491 }
492 }
493
494 ### clean up of the entries.. posix tar /apparently/ has some
495 ### weird 'feature' that allows for filenames > 255 characters
496 ### they'll put a header in with as name '././@LongLink' and the
497 ### contents will be the name of the /next/ file in the archive
498 ### pretty crappy and kludgy if you ask me
499
500 ### set the name for the next entry if this is a @LongLink;
501 ### this is one ugly hack =/ but needed for direct extraction
502 if( $entry->is_longlink ) {
503 $real_name = $data;
b30bcf62 504 next LOOP;
39713df4
RGS
505 } elsif ( defined $real_name ) {
506 $entry->name( $$real_name );
507 $entry->prefix('');
508 undef $real_name;
509 }
510
afabe0e8
CBW
511 if ($filter && $entry->name !~ $filter) {
512 next LOOP;
e0d68803 513
afabe0e8
CBW
514 ### skip this entry if it's a pax header. This is a special file added
515 ### by, among others, git-generated tarballs. It holds comments and is
516 ### not meant for extracting. See #38932: pax_global_header extracted
c3e6accb 517 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
afabe0e8
CBW
518 next LOOP;
519 } elsif ($filter_cb && ! $filter_cb->($entry)) {
520 next LOOP;
521 }
e0d68803 522
d33cd7cf
CBW
523 if ( $extract && !$entry->is_longlink
524 && !$entry->is_unknown
525 && !$entry->is_label ) {
526 $self->_extract_file( $entry ) or return;
527 }
39713df4
RGS
528
529 ### Guard against tarfiles with garbage at the end
530 last LOOP if $entry->name eq '';
531
532 ### push only the name on the rv if we're extracting
533 ### -- for extract_archive
534 push @$tarfile, ($extract ? $entry->name : $entry);
535
536 if( $limit ) {
537 $count-- unless $entry->is_longlink || $entry->is_dir;
538 last LOOP unless $count;
539 }
540 } continue {
541 undef $data;
542 }
543
544 return $tarfile;
545}
546
547=head2 $tar->contains_file( $filename )
548
549Check if the archive contains a certain file.
550It will return true if the file is in the archive, false otherwise.
551
552Note however, that this function does an exact match using C<eq>
553on the full path. So it cannot compensate for case-insensitive file-
554systems or compare 2 paths to see if they would point to the same
555underlying file.
556
557=cut
558
559sub contains_file {
560 my $self = shift;
01d11a1c 561 my $full = shift;
e0d68803 562
01d11a1c 563 return unless defined $full;
39713df4 564
c3745331
RGS
565 ### don't warn if the entry isn't there.. that's what this function
566 ### is for after all.
567 local $WARN = 0;
39713df4
RGS
568 return 1 if $self->_find_entry($full);
569 return;
570}
571
572=head2 $tar->extract( [@filenames] )
573
574Write files whose names are equivalent to any of the names in
575C<@filenames> to disk, creating subdirectories as necessary. This
576might not work too well under VMS.
577Under MacPerl, the file's modification time will be converted to the
578MacOS zero of time, and appropriate conversions will be done to the
579path. However, the length of each element of the path is not
580inspected to see whether it's longer than MacOS currently allows (32
581characters).
582
583If C<extract> is called without a list of file names, the entire
584contents of the archive are extracted.
585
586Returns a list of filenames extracted.
587
588=cut
589
590sub extract {
591 my $self = shift;
b30bcf62 592 my @args = @_;
39713df4
RGS
593 my @files;
594
f38c1908
SP
595 # use the speed optimization for all extracted files
596 local($self->{cwd}) = cwd() unless $self->{cwd};
597
eadbb00b 598 ### you requested the extraction of only certain files
b30bcf62
RGS
599 if( @args ) {
600 for my $file ( @args ) {
e0d68803 601
b30bcf62
RGS
602 ### it's already an object?
603 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
604 push @files, $file;
605 next;
39713df4 606
b30bcf62
RGS
607 ### go find it then
608 } else {
e0d68803 609
b30bcf62
RGS
610 my $found;
611 for my $entry ( @{$self->_data} ) {
612 next unless $file eq $entry->full_path;
e0d68803 613
b30bcf62
RGS
614 ### we found the file you're looking for
615 push @files, $entry;
616 $found++;
617 }
e0d68803 618
b30bcf62 619 unless( $found ) {
e0d68803 620 return $self->_error(
b30bcf62
RGS
621 qq[Could not find '$file' in archive] );
622 }
39713df4
RGS
623 }
624 }
625
626 ### just grab all the file items
627 } else {
628 @files = $self->get_files;
629 }
630
631 ### nothing found? that's an error
632 unless( scalar @files ) {
633 $self->_error( qq[No files found for ] . $self->_file );
634 return;
635 }
636
637 ### now extract them
638 for my $entry ( @files ) {
639 unless( $self->_extract_file( $entry ) ) {
640 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
641 return;
642 }
643 }
644
645 return @files;
646}
647
648=head2 $tar->extract_file( $file, [$extract_path] )
649
650Write an entry, whose name is equivalent to the file name provided to
48e76d2d 651disk. Optionally takes a second parameter, which is the full native
39713df4
RGS
652path (including filename) the entry will be written to.
653
654For example:
655
656 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
657
b30bcf62
RGS
658 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' );
659
39713df4
RGS
660Returns true on success, false on failure.
661
662=cut
663
664sub extract_file {
665 my $self = shift;
01d11a1c 666 my $file = shift; return unless defined $file;
39713df4
RGS
667 my $alt = shift;
668
669 my $entry = $self->_find_entry( $file )
670 or $self->_error( qq[Could not find an entry for '$file'] ), return;
671
672 return $self->_extract_file( $entry, $alt );
673}
674
675sub _extract_file {
676 my $self = shift;
677 my $entry = shift or return;
678 my $alt = shift;
39713df4
RGS
679
680 ### you wanted an alternate extraction location ###
681 my $name = defined $alt ? $alt : $entry->full_path;
682
683 ### splitpath takes a bool at the end to indicate
684 ### that it's splitting a dir
7f10f74b
SH
685 my ($vol,$dirs,$file);
686 if ( defined $alt ) { # It's a local-OS path
687 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
688 $entry->is_dir );
689 } else {
690 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
691 $entry->is_dir );
692 }
693
39713df4
RGS
694 my $dir;
695 ### is $name an absolute path? ###
642eb381 696 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
178aef9a
RGS
697
698 ### absolute names are not allowed to be in tarballs under
699 ### strict mode, so only allow it if a user tells us to do it
700 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
e0d68803 701 $self->_error(
178aef9a
RGS
702 q[Entry ']. $entry->full_path .q[' is an absolute path. ].
703 q[Not extracting absolute paths under SECURE EXTRACT MODE]
e0d68803 704 );
178aef9a
RGS
705 return;
706 }
e0d68803 707
178aef9a 708 ### user asked us to, it's fine.
642eb381 709 $dir = File::Spec->catpath( $vol, $dirs, "" );
39713df4
RGS
710
711 ### it's a relative path ###
712 } else {
e0d68803
JB
713 my $cwd = (ref $self and defined $self->{cwd})
714 ? $self->{cwd}
642eb381 715 : cwd();
f5afd28d 716
f5afd28d
NC
717 my @dirs = defined $alt
718 ? File::Spec->splitdir( $dirs ) # It's a local-OS path
719 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely
720 # straight from the tarball
178aef9a 721
e0d68803
JB
722 if( not defined $alt and
723 not $INSECURE_EXTRACT_MODE
724 ) {
642eb381
SH
725
726 ### paths that leave the current directory are not allowed under
727 ### strict mode, so only allow it if a user tells us to do this.
728 if( grep { $_ eq '..' } @dirs ) {
e0d68803 729
642eb381
SH
730 $self->_error(
731 q[Entry ']. $entry->full_path .q[' is attempting to leave ].
732 q[the current working directory. Not extracting under ].
733 q[SECURE EXTRACT MODE]
734 );
735 return;
e0d68803
JB
736 }
737
642eb381
SH
738 ### the archive may be asking us to extract into a symlink. This
739 ### is not sane and a possible security issue, as outlined here:
740 ### https://rt.cpan.org/Ticket/Display.html?id=30380
741 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
742 ### https://issues.rpath.com/browse/RPL-1716
743 my $full_path = $cwd;
744 for my $d ( @dirs ) {
745 $full_path = File::Spec->catdir( $full_path, $d );
e0d68803 746
642eb381
SH
747 ### we've already checked this one, and it's safe. Move on.
748 next if ref $self and $self->{_link_cache}->{$full_path};
749
750 if( -l $full_path ) {
751 my $to = readlink $full_path;
752 my $diag = "symlinked directory ($full_path => $to)";
753
754 $self->_error(
755 q[Entry ']. $entry->full_path .q[' is attempting to ].
756 qq[extract to a $diag. This is considered a security ].
757 q[vulnerability and not allowed under SECURE EXTRACT ].
758 q[MODE]
759 );
760 return;
761 }
e0d68803 762
642eb381
SH
763 ### XXX keep a cache if possible, so the stats become cheaper:
764 $self->{_link_cache}->{$full_path} = 1 if ref $self;
765 }
766 }
767
2610e7a4
JB
768 ### '.' is the directory delimiter on VMS, which has to be escaped
769 ### or changed to '_' on vms. vmsify is used, because older versions
770 ### of vmspath do not handle this properly.
771 ### Must not add a '/' to an empty directory though.
e0d68803 772 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
f5afd28d 773
e0d68803 774 my ($cwd_vol,$cwd_dir,$cwd_file)
48e76d2d
CB
775 = File::Spec->splitpath( $cwd );
776 my @cwd = File::Spec->splitdir( $cwd_dir );
777 push @cwd, $cwd_file if length $cwd_file;
81a5970e 778
eadbb00b 779 ### We need to pass '' as the last element to catpath. Craig Berry
f5afd28d 780 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
e0d68803 781 ### The root problem is that splitpath on UNIX always returns the
f5afd28d
NC
782 ### final path element as a file even if it is a directory, and of
783 ### course there is no way it can know the difference without checking
784 ### against the filesystem, which it is documented as not doing. When
785 ### you turn around and call catpath, on VMS you have to know which bits
786 ### are directory bits and which bits are file bits. In this case we
787 ### know the result should be a directory. I had thought you could omit
788 ### the file argument to catpath in such a case, but apparently on UNIX
789 ### you can't.
e0d68803
JB
790 $dir = File::Spec->catpath(
791 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
f5afd28d
NC
792 );
793
e0d68803 794 ### catdir() returns undef if the path is longer than 255 chars on
2610e7a4 795 ### older VMS systems.
81a5970e
RGS
796 unless ( defined $dir ) {
797 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
798 return;
799 }
800
39713df4
RGS
801 }
802
803 if( -e $dir && !-d _ ) {
804 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
805 return;
806 }
807
808 unless ( -d _ ) {
809 eval { File::Path::mkpath( $dir, 0, 0777 ) };
810 if( $@ ) {
642eb381
SH
811 my $fp = $entry->full_path;
812 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
39713df4
RGS
813 return;
814 }
e0d68803 815
c3745331
RGS
816 ### XXX chown here? that might not be the same as in the archive
817 ### as we're only chown'ing to the owner of the file we're extracting
818 ### not to the owner of the directory itself, which may or may not
819 ### be another entry in the archive
820 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
821 ### way to go.
822 #if( $CHOWN && CAN_CHOWN ) {
823 # chown $entry->uid, $entry->gid, $dir or
824 # $self->_error( qq[Could not set uid/gid on '$dir'] );
825 #}
39713df4
RGS
826 }
827
828 ### we're done if we just needed to create a dir ###
829 return 1 if $entry->is_dir;
830
831 my $full = File::Spec->catfile( $dir, $file );
832
833 if( $entry->is_unknown ) {
834 $self->_error( qq[Unknown file type for file '$full'] );
835 return;
836 }
837
838 if( length $entry->type && $entry->is_file ) {
839 my $fh = IO::File->new;
840 $fh->open( '>' . $full ) or (
841 $self->_error( qq[Could not open file '$full': $!] ),
842 return
843 );
844
845 if( $entry->size ) {
846 binmode $fh;
847 syswrite $fh, $entry->data or (
848 $self->_error( qq[Could not write data to '$full'] ),
849 return
850 );
851 }
852
853 close $fh or (
854 $self->_error( qq[Could not close file '$full'] ),
855 return
856 );
857
858 } else {
859 $self->_make_special_file( $entry, $full ) or return;
860 }
861
642eb381
SH
862 ### only update the timestamp if it's not a symlink; that will change the
863 ### timestamp of the original. This addresses bug #33669: Could not update
864 ### timestamp warning on symlinks
865 if( not -l $full ) {
866 utime time, $entry->mtime - TIME_OFFSET, $full or
867 $self->_error( qq[Could not update timestamp] );
868 }
39713df4 869
2610e7a4 870 if( $CHOWN && CAN_CHOWN->() ) {
39713df4
RGS
871 chown $entry->uid, $entry->gid, $full or
872 $self->_error( qq[Could not set uid/gid on '$full'] );
873 }
874
875 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
876 ### change the perms on the file they're linking too...
877 if( $CHMOD and not -l $full ) {
1c82faa7
JB
878 my $mode = $entry->mode;
879 unless ($SAME_PERMISSIONS) {
880 $mode &= ~(oct(7000) | umask);
881 }
882 chmod $mode, $full or
39713df4
RGS
883 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
884 }
885
886 return 1;
887}
888
889sub _make_special_file {
890 my $self = shift;
891 my $entry = shift or return;
892 my $file = shift; return unless defined $file;
893
894 my $err;
895
896 if( $entry->is_symlink ) {
897 my $fail;
898 if( ON_UNIX ) {
899 symlink( $entry->linkname, $file ) or $fail++;
900
901 } else {
902 $self->_extract_special_file_as_plain_file( $entry, $file )
903 or $fail++;
904 }
905
642eb381
SH
906 $err = qq[Making symbolic link '$file' to '] .
907 $entry->linkname .q[' failed] if $fail;
39713df4
RGS
908
909 } elsif ( $entry->is_hardlink ) {
910 my $fail;
911 if( ON_UNIX ) {
912 link( $entry->linkname, $file ) or $fail++;
913
914 } else {
915 $self->_extract_special_file_as_plain_file( $entry, $file )
916 or $fail++;
917 }
918
919 $err = qq[Making hard link from '] . $entry->linkname .
920 qq[' to '$file' failed] if $fail;
921
922 } elsif ( $entry->is_fifo ) {
923 ON_UNIX && !system('mknod', $file, 'p') or
924 $err = qq[Making fifo ']. $entry->name .qq[' failed];
925
926 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
927 my $mode = $entry->is_blockdev ? 'b' : 'c';
928
929 ON_UNIX && !system('mknod', $file, $mode,
930 $entry->devmajor, $entry->devminor) or
931 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
932 $entry->devmajor . qq[ min=] . $entry->devminor .
933 qq[) failed.];
934
935 } elsif ( $entry->is_socket ) {
936 ### the original doesn't do anything special for sockets.... ###
937 1;
938 }
939
940 return $err ? $self->_error( $err ) : 1;
941}
942
943### don't know how to make symlinks, let's just extract the file as
944### a plain file
945sub _extract_special_file_as_plain_file {
946 my $self = shift;
947 my $entry = shift or return;
948 my $file = shift; return unless defined $file;
949
950 my $err;
951 TRY: {
952 my $orig = $self->_find_entry( $entry->linkname );
953
954 unless( $orig ) {
955 $err = qq[Could not find file '] . $entry->linkname .
956 qq[' in memory.];
957 last TRY;
958 }
959
960 ### clone the entry, make it appear as a normal file ###
961 my $clone = $entry->clone;
962 $clone->_downgrade_to_plainfile;
963 $self->_extract_file( $clone, $file ) or last TRY;
964
965 return 1;
966 }
967
968 return $self->_error($err);
969}
970
971=head2 $tar->list_files( [\@properties] )
972
973Returns a list of the names of all the files in the archive.
974
975If C<list_files()> is passed an array reference as its first argument
976it returns a list of hash references containing the requested
977properties of each file. The following list of properties is
978supported: name, size, mtime (last modified date), mode, uid, gid,
979linkname, uname, gname, devmajor, devminor, prefix.
980
981Passing an array reference containing only one element, 'name', is
982special cased to return a list of names rather than a list of hash
983references, making it equivalent to calling C<list_files> without
984arguments.
985
986=cut
987
988sub list_files {
989 my $self = shift;
990 my $aref = shift || [ ];
991
992 unless( $self->_data ) {
993 $self->read() or return;
994 }
995
996 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
997 return map { $_->full_path } @{$self->_data};
998 } else {
999
1000 #my @rv;
1001 #for my $obj ( @{$self->_data} ) {
1002 # push @rv, { map { $_ => $obj->$_() } @$aref };
1003 #}
1004 #return @rv;
1005
1006 ### this does the same as the above.. just needs a +{ }
1007 ### to make sure perl doesn't confuse it for a block
1008 return map { my $o=$_;
1009 +{ map { $_ => $o->$_() } @$aref }
1010 } @{$self->_data};
1011 }
1012}
1013
1014sub _find_entry {
1015 my $self = shift;
1016 my $file = shift;
1017
1018 unless( defined $file ) {
1019 $self->_error( qq[No file specified] );
1020 return;
1021 }
1022
b30bcf62
RGS
1023 ### it's an object already
1024 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1025
39713df4
RGS
1026 for my $entry ( @{$self->_data} ) {
1027 my $path = $entry->full_path;
1028 return $entry if $path eq $file;
1029 }
1030
1031 $self->_error( qq[No such file in archive: '$file'] );
1032 return;
1033}
1034
1035=head2 $tar->get_files( [@filenames] )
1036
1037Returns the C<Archive::Tar::File> objects matching the filenames
1038provided. If no filename list was passed, all C<Archive::Tar::File>
1039objects in the current Tar object are returned.
1040
1041Please refer to the C<Archive::Tar::File> documentation on how to
1042handle these objects.
1043
1044=cut
1045
1046sub get_files {
1047 my $self = shift;
1048
1049 return @{ $self->_data } unless @_;
1050
1051 my @list;
1052 for my $file ( @_ ) {
1053 push @list, grep { defined } $self->_find_entry( $file );
1054 }
1055
1056 return @list;
1057}
1058
1059=head2 $tar->get_content( $file )
1060
1061Return the content of the named file.
1062
1063=cut
1064
1065sub get_content {
1066 my $self = shift;
1067 my $entry = $self->_find_entry( shift ) or return;
1068
1069 return $entry->data;
1070}
1071
1072=head2 $tar->replace_content( $file, $content )
1073
1074Make the string $content be the content for the file named $file.
1075
1076=cut
1077
1078sub replace_content {
1079 my $self = shift;
1080 my $entry = $self->_find_entry( shift ) or return;
1081
1082 return $entry->replace_content( shift );
1083}
1084
1085=head2 $tar->rename( $file, $new_name )
1086
1087Rename the file of the in-memory archive to $new_name.
1088
1089Note that you must specify a Unix path for $new_name, since per tar
1090standard, all files in the archive must be Unix paths.
1091
1092Returns true on success and false on failure.
1093
1094=cut
1095
1096sub rename {
1097 my $self = shift;
1098 my $file = shift; return unless defined $file;
1099 my $new = shift; return unless defined $new;
1100
1101 my $entry = $self->_find_entry( $file ) or return;
1102
1103 return $entry->rename( $new );
1104}
1105
1e915955
CBW
1106=head2 $tar->chmod( $file, $mode )
1107
1108Change mode of $file to $mode.
1109
1110Returns true on success and false on failure.
1111
1112=cut
1113
1114sub chmod {
1115 my $self = shift;
1116 my $file = shift; return unless defined $file;
1117 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1118 my @args = ("$mode");
1119
1120 my $entry = $self->_find_entry( $file ) or return;
1121 my $x = $entry->chmod( @args );
1122 return $x;
1123}
1124
93e94d8a
CBW
1125=head2 $tar->chown( $file, $uname [, $gname] )
1126
1127Change owner $file to $uname and $gname.
1128
1129Returns true on success and false on failure.
1130
1131=cut
1132
1133sub chown {
1134 my $self = shift;
1135 my $file = shift; return unless defined $file;
1136 my $uname = shift; return unless defined $uname;
1137 my @args = ($uname);
1138 push(@args, shift);
1139
1140 my $entry = $self->_find_entry( $file ) or return;
1141 my $x = $entry->chown( @args );
1142 return $x;
1143}
1144
39713df4
RGS
1145=head2 $tar->remove (@filenamelist)
1146
1147Removes any entries with names matching any of the given filenames
1148from the in-memory archive. Returns a list of C<Archive::Tar::File>
1149objects that remain.
1150
1151=cut
1152
1153sub remove {
1154 my $self = shift;
1155 my @list = @_;
1156
1157 my %seen = map { $_->full_path => $_ } @{$self->_data};
1158 delete $seen{ $_ } for @list;
1159
1160 $self->_data( [values %seen] );
1161
1162 return values %seen;
1163}
1164
1165=head2 $tar->clear
1166
1167C<clear> clears the current in-memory archive. This effectively gives
1168you a 'blank' object, ready to be filled again. Note that C<clear>
1169only has effect on the object, not the underlying tarfile.
1170
1171=cut
1172
1173sub clear {
1174 my $self = shift or return;
1175
1176 $self->_data( [] );
1177 $self->_file( '' );
1178
1179 return 1;
1180}
1181
1182
1183=head2 $tar->write ( [$file, $compressed, $prefix] )
1184
1185Write the in-memory archive to disk. The first argument can either
1186be the name of a file or a reference to an already open filehandle (a
e0d68803 1187GLOB reference).
642eb381 1188
e0d68803 1189The second argument is used to indicate compression. You can either
642eb381 1190compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1191to be the C<gzip> compression level (between 1 and 9), but the use of
eadbb00b 1192constants is preferred:
642eb381
SH
1193
1194 # write a gzip compressed file
bef46b70 1195 $tar->write( 'out.tgz', COMPRESS_GZIP );
642eb381 1196
e0d68803 1197 # write a bzip compressed file
bef46b70 1198 $tar->write( 'out.tbz', COMPRESS_BZIP );
39713df4
RGS
1199
1200Note that when you pass in a filehandle, the compression argument
1201is ignored, as all files are printed verbatim to your filehandle.
1202If you wish to enable compression with filehandles, use an
642eb381 1203C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1204
1205The third argument is an optional prefix. All files will be tucked
1206away in the directory you specify as prefix. So if you have files
1207'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1208will be written to the archive as 'foo/a' and 'foo/b'.
1209
1210If no arguments are given, C<write> returns the entire formatted
1211archive as a string, which could be useful if you'd like to stuff the
1212archive into a socket or a pipe to gzip or something.
1213
642eb381 1214
39713df4
RGS
1215=cut
1216
1217sub write {
1218 my $self = shift;
1219 my $file = shift; $file = '' unless defined $file;
1220 my $gzip = shift || 0;
1221 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1222 my $dummy = '';
e0d68803 1223
39713df4
RGS
1224 ### only need a handle if we have a file to print to ###
1225 my $handle = length($file)
1226 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1227 or return )
1228 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
e0d68803 1229 : $HAS_IO_STRING ? IO::String->new
39713df4
RGS
1230 : __PACKAGE__->no_string_support();
1231
e0d68803
JB
1232 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1233 ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1234 ### printed to the archive
1235 local $\;
39713df4
RGS
1236
1237 for my $entry ( @{$self->_data} ) {
1238 ### entries to be written to the tarfile ###
1239 my @write_me;
1240
1241 ### only now will we change the object to reflect the current state
1242 ### of the name and prefix fields -- this needs to be limited to
1243 ### write() only!
1244 my $clone = $entry->clone;
1245
1246
e0d68803 1247 ### so, if you don't want use to use the prefix, we'll stuff
39713df4
RGS
1248 ### everything in the name field instead
1249 if( $DO_NOT_USE_PREFIX ) {
1250
1251 ### you might have an extended prefix, if so, set it in the clone
1252 ### XXX is ::Unix right?
1253 $clone->name( length $ext_prefix
1254 ? File::Spec::Unix->catdir( $ext_prefix,
1255 $clone->full_path)
1256 : $clone->full_path );
1257 $clone->prefix( '' );
1258
1259 ### otherwise, we'll have to set it properly -- prefix part in the
1260 ### prefix and name part in the name field.
1261 } else {
1262
1263 ### split them here, not before!
1264 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1265
1266 ### you might have an extended prefix, if so, set it in the clone
1267 ### XXX is ::Unix right?
1268 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1269 if length $ext_prefix;
1270
1271 $clone->prefix( $prefix );
1272 $clone->name( $name );
1273 }
1274
1275 ### names are too long, and will get truncated if we don't add a
1276 ### '@LongLink' file...
1277 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
1278 length($clone->prefix) > PREFIX_LENGTH
1279 ) || 0;
1280
1281 ### perhaps we need to make a longlink file?
1282 if( $make_longlink ) {
1283 my $longlink = Archive::Tar::File->new(
1284 data => LONGLINK_NAME,
1285 $clone->full_path,
1286 { type => LONGLINK }
1287 );
1288
1289 unless( $longlink ) {
1290 $self->_error( qq[Could not create 'LongLink' entry for ] .
1291 qq[oversize file '] . $clone->full_path ."'" );
1292 return;
1293 };
1294
1295 push @write_me, $longlink;
1296 }
1297
1298 push @write_me, $clone;
1299
1300 ### write the one, optionally 2 a::t::file objects to the handle
1301 for my $clone (@write_me) {
1302
1303 ### if the file is a symlink, there are 2 options:
1304 ### either we leave the symlink intact, but then we don't write any
1305 ### data OR we follow the symlink, which means we actually make a
1306 ### copy. if we do the latter, we have to change the TYPE of the
1307 ### clone to 'FILE'
1308 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1309 my $data_ok = !$clone->is_symlink && $clone->has_content;
1310
1311 ### downgrade to a 'normal' file if it's a symlink we're going to
1312 ### treat as a regular file
1313 $clone->_downgrade_to_plainfile if $link_ok;
1314
1315 ### get the header for this block
1316 my $header = $self->_format_tar_entry( $clone );
1317 unless( $header ) {
1318 $self->_error(q[Could not format header for: ] .
1319 $clone->full_path );
1320 return;
1321 }
1322
1323 unless( print $handle $header ) {
1324 $self->_error(q[Could not write header for: ] .
1325 $clone->full_path);
1326 return;
1327 }
1328
1329 if( $link_ok or $data_ok ) {
1330 unless( print $handle $clone->data ) {
1331 $self->_error(q[Could not write data for: ] .
1332 $clone->full_path);
1333 return;
1334 }
1335
1336 ### pad the end of the clone if required ###
1337 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1338 }
1339
1340 } ### done writing these entries
1341 }
1342
1343 ### write the end markers ###
1344 print $handle TAR_END x 2 or
1345 return $self->_error( qq[Could not write tar end markers] );
b30bcf62 1346
39713df4 1347 ### did you want it written to a file, or returned as a string? ###
b30bcf62 1348 my $rv = length($file) ? 1
39713df4 1349 : $HAS_PERLIO ? $dummy
b30bcf62
RGS
1350 : do { seek $handle, 0, 0; local $/; <$handle> };
1351
d33cd7cf 1352 ### make sure to close the handle if we created it
afabe0e8
CBW
1353 if ( $file ne $handle ) {
1354 unless( close $handle ) {
1355 $self->_error( qq[Could not write tar] );
1356 return;
1357 }
1358 }
e0d68803 1359
b30bcf62 1360 return $rv;
39713df4
RGS
1361}
1362
1363sub _format_tar_entry {
1364 my $self = shift;
1365 my $entry = shift or return;
1366 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1367 my $no_prefix = shift || 0;
1368
1369 my $file = $entry->name;
1370 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1371
1372 ### remove the prefix from the file name
eadbb00b 1373 ### not sure if this is still needed --kane
39713df4
RGS
1374 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1375 ### this for us. Even worse, this would break if we tried to add a file
1376 ### like x/x.
1377 #if( length $prefix ) {
1378 # $file =~ s/^$match//;
1379 #}
1380
1381 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1382 if length $ext_prefix;
1383
1384 ### not sure why this is... ###
1385 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1386 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1387
d33cd7cf 1388 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
39713df4
RGS
1389
1390 ### this might be optimizable with a 'changed' flag in the file objects ###
1391 my $tar = pack (
1392 PACK,
1393 $file,
1394
1395 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1396 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1397
1398 "", # checksum field - space padded a bit down
1399
1400 (map { $entry->$_() } qw[type linkname magic]),
1401
1402 $entry->version || TAR_VERSION,
1403
1404 (map { $entry->$_() } qw[uname gname]),
1405 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1406
1407 ($no_prefix ? '' : $prefix)
1408 );
1409
1410 ### add the checksum ###
d33cd7cf 1411 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
39713df4
RGS
1412 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1413
1414 return $tar;
1415}
1416
1417=head2 $tar->add_files( @filenamelist )
1418
1419Takes a list of filenames and adds them to the in-memory archive.
1420
1421The path to the file is automatically converted to a Unix like
1422equivalent for use in the archive, and, if on MacOS, the file's
1423modification time is converted from the MacOS epoch to the Unix epoch.
1424So tar archives created on MacOS with B<Archive::Tar> can be read
1425both with I<tar> on Unix and applications like I<suntar> or
1426I<Stuffit Expander> on MacOS.
1427
1428Be aware that the file's type/creator and resource fork will be lost,
1429which is usually what you want in cross-platform archives.
1430
2610e7a4
JB
1431Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1432object from, for example, another archive. The object will be clone, and
1433effectively be a copy of the original, not an alias.
1434
39713df4
RGS
1435Returns a list of C<Archive::Tar::File> objects that were just added.
1436
1437=cut
1438
1439sub add_files {
1440 my $self = shift;
1441 my @files = @_ or return;
1442
1443 my @rv;
1444 for my $file ( @files ) {
2610e7a4
JB
1445
1446 ### you passed an Archive::Tar::File object
1447 ### clone it so we don't accidentally have a reference to
1448 ### an object from another archive
1449 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
e0d68803 1450 push @rv, $file->clone;
2610e7a4
JB
1451 next;
1452 }
e0d68803 1453
c3745331 1454 unless( -e $file || -l $file ) {
39713df4
RGS
1455 $self->_error( qq[No such file: '$file'] );
1456 next;
1457 }
1458
1459 my $obj = Archive::Tar::File->new( file => $file );
1460 unless( $obj ) {
1461 $self->_error( qq[Unable to add file: '$file'] );
1462 next;
1463 }
1464
1465 push @rv, $obj;
1466 }
1467
1468 push @{$self->{_data}}, @rv;
1469
1470 return @rv;
1471}
1472
1473=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1474
1475Takes a filename, a scalar full of data and optionally a reference to
1476a hash with specific options.
1477
1478Will add a file to the in-memory archive, with name C<$filename> and
1479content C<$data>. Specific properties can be set using C<$opthashref>.
1480The following list of properties is supported: name, size, mtime
1481(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1482devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4
RGS
1483modification times are converted to Unix equivalents.)
1484
b3200c5d
SP
1485Valid values for the file type are the following constants defined in
1486Archive::Tar::Constants:
1487
1488=over 4
1489
1490=item FILE
1491
1492Regular file.
1493
1494=item HARDLINK
1495
1496=item SYMLINK
1497
1498Hard and symbolic ("soft") links; linkname should specify target.
1499
1500=item CHARDEV
1501
1502=item BLOCKDEV
1503
1504Character and block devices. devmajor and devminor should specify the major
1505and minor device numbers.
1506
1507=item DIR
1508
1509Directory.
1510
1511=item FIFO
1512
1513FIFO (named pipe).
1514
1515=item SOCKET
1516
1517Socket.
1518
1519=back
1520
39713df4
RGS
1521Returns the C<Archive::Tar::File> object that was just added, or
1522C<undef> on failure.
1523
1524=cut
1525
1526sub add_data {
1527 my $self = shift;
1528 my ($file, $data, $opt) = @_;
1529
1530 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1531 unless( $obj ) {
1532 $self->_error( qq[Unable to add file: '$file'] );
1533 return;
1534 }
1535
1536 push @{$self->{_data}}, $obj;
1537
1538 return $obj;
1539}
1540
1541=head2 $tar->error( [$BOOL] )
1542
1543Returns the current errorstring (usually, the last error reported).
1544If a true value was specified, it will give the C<Carp::longmess>
1545equivalent of the error, in effect giving you a stacktrace.
1546
1547For backwards compatibility, this error is also available as
1548C<$Archive::Tar::error> although it is much recommended you use the
1549method call instead.
1550
1551=cut
1552
1553{
1554 $error = '';
1555 my $longmess;
1556
1557 sub _error {
1558 my $self = shift;
1559 my $msg = $error = shift;
1560 $longmess = Carp::longmess($error);
941cb2bb
CBW
1561 if (ref $self) {
1562 $self->{_error} = $error;
1563 $self->{_longmess} = $longmess;
1564 }
39713df4
RGS
1565
1566 ### set Archive::Tar::WARN to 0 to disable printing
1567 ### of errors
1568 if( $WARN ) {
1569 carp $DEBUG ? $longmess : $msg;
1570 }
1571
1572 return;
1573 }
1574
1575 sub error {
1576 my $self = shift;
941cb2bb
CBW
1577 if (ref $self) {
1578 return shift() ? $self->{_longmess} : $self->{_error};
1579 } else {
1580 return shift() ? $longmess : $error;
1581 }
39713df4
RGS
1582 }
1583}
1584
f38c1908
SP
1585=head2 $tar->setcwd( $cwd );
1586
1587C<Archive::Tar> needs to know the current directory, and it will run
e0d68803 1588C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
f38c1908 1589tarfile and saves it in the file system. (As of version 1.30, however,
e0d68803 1590C<Archive::Tar> will use the speed optimization described below
f38c1908
SP
1591automatically, so it's only relevant if you're using C<extract_file()>).
1592
1593Since C<Archive::Tar> doesn't change the current directory internally
1594while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1595can be avoided if we can guarantee that the current directory doesn't
1596get changed externally.
1597
1598To use this performance boost, set the current directory via
1599
1600 use Cwd;
1601 $tar->setcwd( cwd() );
1602
1603once before calling a function like C<extract_file> and
1604C<Archive::Tar> will use the current directory setting from then on
e0d68803 1605and won't call C<Cwd::cwd()> internally.
f38c1908
SP
1606
1607To switch back to the default behaviour, use
1608
1609 $tar->setcwd( undef );
1610
1611and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1612
eadbb00b 1613If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
f38c1908
SP
1614be called for you.
1615
e0d68803 1616=cut
f38c1908
SP
1617
1618sub setcwd {
1619 my $self = shift;
1620 my $cwd = shift;
1621
1622 $self->{cwd} = $cwd;
1623}
39713df4 1624
39713df4
RGS
1625=head1 Class Methods
1626
642eb381 1627=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
39713df4
RGS
1628
1629Creates a tar file from the list of files provided. The first
1630argument can either be the name of the tar file to create or a
1631reference to an open file handle (e.g. a GLOB reference).
1632
e0d68803 1633The second argument is used to indicate compression. You can either
642eb381 1634compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
e0d68803 1635to be the C<gzip> compression level (between 1 and 9), but the use of
eadbb00b 1636constants is preferred:
642eb381
SH
1637
1638 # write a gzip compressed file
bef46b70 1639 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
642eb381 1640
e0d68803 1641 # write a bzip compressed file
bef46b70 1642 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
39713df4
RGS
1643
1644Note that when you pass in a filehandle, the compression argument
1645is ignored, as all files are printed verbatim to your filehandle.
1646If you wish to enable compression with filehandles, use an
642eb381 1647C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
39713df4
RGS
1648
1649The remaining arguments list the files to be included in the tar file.
1650These files must all exist. Any files which don't exist or can't be
1651read are silently ignored.
1652
1653If the archive creation fails for any reason, C<create_archive> will
1654return false. Please use the C<error> method to find the cause of the
1655failure.
1656
1657Note that this method does not write C<on the fly> as it were; it
1658still reads all the files into memory before writing out the archive.
1659Consult the FAQ below if this is a problem.
1660
1661=cut
1662
1663sub create_archive {
1664 my $class = shift;
1665
1666 my $file = shift; return unless defined $file;
1667 my $gzip = shift || 0;
1668 my @files = @_;
1669
1670 unless( @files ) {
1671 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1672 }
1673
1674 my $tar = $class->new;
1675 $tar->add_files( @files );
1676 return $tar->write( $file, $gzip );
1677}
1678
642eb381
SH
1679=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1680
1681Returns an iterator function that reads the tar file without loading
1682it all in memory. Each time the function is called it will return the
1683next file in the tarball. The files are returned as
1684C<Archive::Tar::File> objects. The iterator function returns the
941cb2bb 1685empty list once it has exhausted the files contained.
642eb381
SH
1686
1687The second argument can be a hash reference with options, which are
1688identical to the arguments passed to C<read()>.
1689
1690Example usage:
1691
1692 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1693
1694 while( my $f = $next->() ) {
1695 print $f->name, "\n";
1696
1697 $f->extract or warn "Extraction failed";
e0d68803 1698
642eb381
SH
1699 # ....
1700 }
1701
1702=cut
1703
1704
1705sub iter {
1706 my $class = shift;
1707 my $filename = shift or return;
93e94d8a 1708 my $compressed = shift || 0;
642eb381
SH
1709 my $opts = shift || {};
1710
1711 ### get a handle to read from.
1712 my $handle = $class->_get_handle(
e0d68803
JB
1713 $filename,
1714 $compressed,
642eb381
SH
1715 READ_ONLY->( ZLIB )
1716 ) or return;
1717
1718 my @data;
1719 return sub {
1720 return shift(@data) if @data; # more than one file returned?
1721 return unless $handle; # handle exhausted?
1722
1723 ### read data, should only return file
941cb2bb
CBW
1724 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1725 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
642eb381
SH
1726
1727 ### return one piece of data
1728 return shift(@data) if @data;
e0d68803 1729
642eb381
SH
1730 ### data is exhausted, free the filehandle
1731 undef $handle;
1732 return;
1733 };
1734}
1735
1736=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
39713df4
RGS
1737
1738Returns a list of the names of all the files in the archive. The
1739first argument can either be the name of the tar file to list or a
1740reference to an open file handle (e.g. a GLOB reference).
1741
1742If C<list_archive()> is passed an array reference as its third
1743argument it returns a list of hash references containing the requested
1744properties of each file. The following list of properties is
e0d68803 1745supported: full_path, name, size, mtime (last modified date), mode,
b3200c5d
SP
1746uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1747
1748See C<Archive::Tar::File> for details about supported properties.
39713df4
RGS
1749
1750Passing an array reference containing only one element, 'name', is
1751special cased to return a list of names rather than a list of hash
1752references.
1753
1754=cut
1755
1756sub list_archive {
1757 my $class = shift;
1758 my $file = shift; return unless defined $file;
1759 my $gzip = shift || 0;
1760
1761 my $tar = $class->new($file, $gzip);
1762 return unless $tar;
1763
1764 return $tar->list_files( @_ );
1765}
1766
642eb381 1767=head2 Archive::Tar->extract_archive($file, $compressed)
39713df4
RGS
1768
1769Extracts the contents of the tar file. The first argument can either
1770be the name of the tar file to create or a reference to an open file
1771handle (e.g. a GLOB reference). All relative paths in the tar file will
1772be created underneath the current working directory.
1773
1774C<extract_archive> will return a list of files it extracted.
1775If the archive extraction fails for any reason, C<extract_archive>
1776will return false. Please use the C<error> method to find the cause
1777of the failure.
1778
1779=cut
1780
1781sub extract_archive {
1782 my $class = shift;
1783 my $file = shift; return unless defined $file;
1784 my $gzip = shift || 0;
1785
1786 my $tar = $class->new( ) or return;
1787
1788 return $tar->read( $file, $gzip, { extract => 1 } );
1789}
1790
f5695358
JB
1791=head2 $bool = Archive::Tar->has_io_string
1792
1793Returns true if we currently have C<IO::String> support loaded.
1794
e0d68803 1795Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1796stringified archives. Currently, C<perlio> is the preferred method, if
1797available.
1798
1799See the C<GLOBAL VARIABLES> section to see how to change this preference.
1800
1801=cut
1802
1803sub has_io_string { return $HAS_IO_STRING; }
1804
1805=head2 $bool = Archive::Tar->has_perlio
1806
1807Returns true if we currently have C<perlio> support loaded.
1808
e0d68803 1809This requires C<perl-5.8> or higher, compiled with C<perlio>
f5695358 1810
e0d68803 1811Either C<IO::String> or C<perlio> support is needed to support writing
f5695358
JB
1812stringified archives. Currently, C<perlio> is the preferred method, if
1813available.
1814
1815See the C<GLOBAL VARIABLES> section to see how to change this preference.
1816
1817=cut
1818
1819sub has_perlio { return $HAS_PERLIO; }
1820
1821=head2 $bool = Archive::Tar->has_zlib_support
1822
1823Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1824
1825=cut
1826
1827sub has_zlib_support { return ZLIB }
1828
1829=head2 $bool = Archive::Tar->has_bzip2_support
1830
1831Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1832
1833=cut
1834
1835sub has_bzip2_support { return BZIP }
1836
39713df4
RGS
1837=head2 Archive::Tar->can_handle_compressed_files
1838
1839A simple checking routine, which will return true if C<Archive::Tar>
642eb381
SH
1840is able to uncompress compressed archives on the fly with C<IO::Zlib>
1841and C<IO::Compress::Bzip2> or false if not both are installed.
39713df4
RGS
1842
1843You can use this as a shortcut to determine whether C<Archive::Tar>
1844will do what you think before passing compressed archives to its
1845C<read> method.
1846
1847=cut
1848
642eb381 1849sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
39713df4
RGS
1850
1851sub no_string_support {
1852 croak("You have to install IO::String to support writing archives to strings");
1853}
1854
18551;
1856
1857__END__
1858
1859=head1 GLOBAL VARIABLES
1860
1861=head2 $Archive::Tar::FOLLOW_SYMLINK
1862
1863Set this variable to C<1> to make C<Archive::Tar> effectively make a
1864copy of the file when extracting. Default is C<0>, which
1865means the symlink stays intact. Of course, you will have to pack the
1866file linked to as well.
1867
1868This option is checked when you write out the tarfile using C<write>
1869or C<create_archive>.
1870
1871This works just like C</bin/tar>'s C<-h> option.
1872
1873=head2 $Archive::Tar::CHOWN
1874
1875By default, C<Archive::Tar> will try to C<chown> your files if it is
1876able to. In some cases, this may not be desired. In that case, set
1877this variable to C<0> to disable C<chown>-ing, even if it were
1878possible.
1879
1880The default is C<1>.
1881
1882=head2 $Archive::Tar::CHMOD
1883
1884By default, C<Archive::Tar> will try to C<chmod> your files to
1885whatever mode was specified for the particular file in the archive.
1886In some cases, this may not be desired. In that case, set this
1887variable to C<0> to disable C<chmod>-ing.
1888
1889The default is C<1>.
1890
1c82faa7
JB
1891=head2 $Archive::Tar::SAME_PERMISSIONS
1892
1893When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
1894the permissions on files from the archive are used without modification
1895of if they are filtered by removing any setid bits and applying the
1896current umask.
1897
1898The default is C<1> for the root user and C<0> for normal users.
1899
39713df4
RGS
1900=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1901
e0d68803 1902By default, C<Archive::Tar> will try to put paths that are over
f38c1908 1903100 characters in the C<prefix> field of your tar header, as
e0d68803
JB
1904defined per POSIX-standard. However, some (older) tar programs
1905do not implement this spec. To retain compatibility with these older
1906or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1907variable to a true value, and C<Archive::Tar> will use an alternate
1908way of dealing with paths over 100 characters by using the
f38c1908
SP
1909C<GNU Extended Header> feature.
1910
1911Note that clients who do not support the C<GNU Extended Header>
1912feature will not be able to read these archives. Such clients include
1913tars on C<Solaris>, C<Irix> and C<AIX>.
39713df4
RGS
1914
1915The default is C<0>.
1916
1917=head2 $Archive::Tar::DEBUG
1918
1919Set this variable to C<1> to always get the C<Carp::longmess> output
1920of the warnings, instead of the regular C<carp>. This is the same
1921message you would get by doing:
1922
1923 $tar->error(1);
1924
1925Defaults to C<0>.
1926
1927=head2 $Archive::Tar::WARN
1928
1929Set this variable to C<0> if you do not want any warnings printed.
1930Personally I recommend against doing this, but people asked for the
1931option. Also, be advised that this is of course not threadsafe.
1932
1933Defaults to C<1>.
1934
1935=head2 $Archive::Tar::error
1936
1937Holds the last reported error. Kept for historical reasons, but its
1938use is very much discouraged. Use the C<error()> method instead:
1939
1940 warn $tar->error unless $tar->extract;
1941
941cb2bb
CBW
1942Note that in older versions of this module, the C<error()> method
1943would return an effectively global value even when called an instance
1944method as above. This has since been fixed, and multiple instances of
1945C<Archive::Tar> now have separate error strings.
1946
178aef9a
RGS
1947=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
1948
1949This variable indicates whether C<Archive::Tar> should allow
1950files to be extracted outside their current working directory.
1951
1952Allowing this could have security implications, as a malicious
1953tar archive could alter or replace any file the extracting user
e0d68803
JB
1954has permissions to. Therefor, the default is to not allow
1955insecure extractions.
178aef9a 1956
e0d68803
JB
1957If you trust the archive, or have other reasons to allow the
1958archive to write files outside your current working directory,
178aef9a
RGS
1959set this variable to C<true>.
1960
1961Note that this is a backwards incompatible change from version
1962C<1.36> and before.
1963
39713df4
RGS
1964=head2 $Archive::Tar::HAS_PERLIO
1965
e0d68803 1966This variable holds a boolean indicating if we currently have
39713df4 1967C<perlio> support loaded. This will be enabled for any perl
e0d68803 1968greater than C<5.8> compiled with C<perlio>.
39713df4
RGS
1969
1970If you feel strongly about disabling it, set this variable to
1971C<false>. Note that you will then need C<IO::String> installed
1972to support writing stringified archives.
1973
1974Don't change this variable unless you B<really> know what you're
1975doing.
1976
1977=head2 $Archive::Tar::HAS_IO_STRING
1978
e0d68803 1979This variable holds a boolean indicating if we currently have
39713df4
RGS
1980C<IO::String> support loaded. This will be enabled for any perl
1981that has a loadable C<IO::String> module.
1982
1983If you feel strongly about disabling it, set this variable to
1984C<false>. Note that you will then need C<perlio> support from
1985your perl to be able to write stringified archives.
1986
1987Don't change this variable unless you B<really> know what you're
1988doing.
1989
d33cd7cf
CBW
1990=head2 $Archive::Tar::ZERO_PAD_NUMBERS
1991
1992This variable holds a boolean indicating if we will create
93e94d8a 1993zero padded numbers for C<size>, C<mtime> and C<checksum>.
d33cd7cf
CBW
1994The default is C<0>, indicating that we will create space padded
1995numbers. Added for compatibility with C<busybox> implementations.
1996
39713df4
RGS
1997=head1 FAQ
1998
1999=over 4
2000
2001=item What's the minimum perl version required to run Archive::Tar?
2002
2003You will need perl version 5.005_03 or newer.
2004
2005=item Isn't Archive::Tar slow?
2006
2007Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2008However, it's very portable. If speed is an issue, consider using
2009C</bin/tar> instead.
2010
2011=item Isn't Archive::Tar heavier on memory than /bin/tar?
2012
2013Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
2014C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2015choice but to read the archive into memory.
2016This is ok if you want to do in-memory manipulation of the archive.
642eb381 2017
39713df4
RGS
2018If you just want to extract, use the C<extract_archive> class method
2019instead. It will optimize and write to disk immediately.
2020
642eb381
SH
2021Another option is to use the C<iter> class method to iterate over
2022the files in the tarball without reading them all in memory at once.
2023
2024=item Can you lazy-load data instead?
39713df4 2025
642eb381
SH
2026In some cases, yes. You can use the C<iter> class method to iterate
2027over the files in the tarball without reading them all in memory at once.
39713df4
RGS
2028
2029=item How much memory will an X kb tar file need?
2030
2031Probably more than X kb, since it will all be read into memory. If
2032this is a problem, and you don't need to do in memory manipulation
e0d68803 2033of the archive, consider using the C<iter> class method, or C</bin/tar>
642eb381 2034instead.
39713df4
RGS
2035
2036=item What do you do with unsupported filetypes in an archive?
2037
2038C<Unix> has a few filetypes that aren't supported on other platforms,
2039like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2040try to make a copy of the original file, rather than throwing an error.
2041
2042This does require you to read the entire archive in to memory first,
2043since otherwise we wouldn't know what data to fill the copy with.
e0d68803
JB
2044(This means that you cannot use the class methods, including C<iter>
2045on archives that have incompatible filetypes and still expect things
642eb381 2046to work).
39713df4
RGS
2047
2048For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2049the extraction of this particular item didn't work.
2050
f38c1908
SP
2051=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2052
2053By default, C<Archive::Tar> is in a completely POSIX-compatible
2054mode, which uses the POSIX-specification of C<tar> to store files.
eadbb00b 2055For paths greater than 100 characters, this is done using the
f38c1908
SP
2056C<POSIX header prefix>. Non-POSIX-compatible clients may not support
2057this part of the specification, and may only support the C<GNU Extended
2058Header> functionality. To facilitate those clients, you can set the
e0d68803 2059C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
f38c1908
SP
2060C<GLOBAL VARIABLES> section for details on this variable.
2061
c3745331
RGS
2062Note that GNU tar earlier than version 1.14 does not cope well with
2063the C<POSIX header prefix>. If you use such a version, consider setting
2064the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2065
b30bcf62
RGS
2066=item How do I extract only files that have property X from an archive?
2067
2068Sometimes, you might not wish to extract a complete archive, just
2069the files that are relevant to you, based on some criteria.
2070
2071You can do this by filtering a list of C<Archive::Tar::File> objects
2072based on your criteria. For example, to extract only files that have
2073the string C<foo> in their title, you would use:
2074
e0d68803 2075 $tar->extract(
b30bcf62 2076 grep { $_->full_path =~ /foo/ } $tar->get_files
e0d68803 2077 );
b30bcf62
RGS
2078
2079This way, you can filter on any attribute of the files in the archive.
2080Consult the C<Archive::Tar::File> documentation on how to use these
2081objects.
2082
81a5970e
RGS
2083=item How do I access .tar.Z files?
2084
2085The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2086the C<IO::Zlib> module) to access tar files that have been compressed
2087with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2088utility cannot be read by C<Compress::Zlib> and so cannot be directly
2089accesses by C<Archive::Tar>.
2090
2091If the C<uncompress> or C<gunzip> programs are available, you can use
2092one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2093
2094Firstly with C<uncompress>
2095
2096 use Archive::Tar;
2097
2098 open F, "uncompress -c $filename |";
2099 my $tar = Archive::Tar->new(*F);
2100 ...
2101
2102and this with C<gunzip>
2103
2104 use Archive::Tar;
2105
2106 open F, "gunzip -c $filename |";
2107 my $tar = Archive::Tar->new(*F);
2108 ...
2109
2110Similarly, if the C<compress> program is available, you can use this to
2111write a C<.tar.Z> file
2112
2113 use Archive::Tar;
2114 use IO::File;
2115
2116 my $fh = new IO::File "| compress -c >$filename";
2117 my $tar = Archive::Tar->new();
2118 ...
2119 $tar->write($fh);
2120 $fh->close ;
2121
01d11a1c
SP
2122=item How do I handle Unicode strings?
2123
2124C<Archive::Tar> uses byte semantics for any files it reads from or writes
2125to disk. This is not a problem if you only deal with files and never
2126look at their content or work solely with byte strings. But if you use
2127Unicode strings with character semantics, some additional steps need
2128to be taken.
2129
2130For example, if you add a Unicode string like
2131
2132 # Problem
2133 $tar->add_data('file.txt', "Euro: \x{20AC}");
2134
2135then there will be a problem later when the tarfile gets written out
2136to disk via C<$tar->write()>:
2137
2138 Wide character in print at .../Archive/Tar.pm line 1014.
2139
2140The data was added as a Unicode string and when writing it out to disk,
2141the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2142tried to convert the string to ISO-8859 and failed. The written file
2143now contains garbage.
2144
2145For this reason, Unicode strings need to be converted to UTF-8-encoded
2146bytestrings before they are handed off to C<add_data()>:
2147
2148 use Encode;
2149 my $data = "Accented character: \x{20AC}";
2150 $data = encode('utf8', $data);
2151
2152 $tar->add_data('file.txt', $data);
2153
e0d68803 2154A opposite problem occurs if you extract a UTF8-encoded file from a
01d11a1c
SP
2155tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2156will return its content as a bytestring, not as a Unicode string.
2157
2158If you want it to be a Unicode string (because you want character
2159semantics with operations like regular expression matching), you need
e0d68803 2160to decode the UTF8-encoded content and have Perl convert it into
01d11a1c
SP
2161a Unicode string:
2162
2163 use Encode;
2164 my $data = $tar->get_content();
e0d68803 2165
01d11a1c
SP
2166 # Make it a Unicode string
2167 $data = decode('utf8', $data);
2168
e0d68803 2169There is no easy way to provide this functionality in C<Archive::Tar>,
01d11a1c
SP
2170because a tarball can contain many files, and each of which could be
2171encoded in a different way.
81a5970e 2172
39713df4
RGS
2173=back
2174
f475b4a2
JB
2175=head1 CAVEATS
2176
93e94d8a 2177The AIX tar does not fill all unused space in the tar archive with 0x00.
f475b4a2
JB
2178This sometimes leads to warning messages from C<Archive::Tar>.
2179
2180 Invalid header block at offset nnn
2181
2182A fix for that problem is scheduled to be released in the following levels
2183of AIX, all of which should be coming out in the 4th quarter of 2009:
2184
2185 AIX 5.3 TL7 SP10
2186 AIX 5.3 TL8 SP8
2187 AIX 5.3 TL9 SP5
2188 AIX 5.3 TL10 SP2
93e94d8a 2189
f475b4a2
JB
2190 AIX 6.1 TL0 SP11
2191 AIX 6.1 TL1 SP7
2192 AIX 6.1 TL2 SP6
2193 AIX 6.1 TL3 SP3
2194
93e94d8a
CBW
2195The IBM APAR number for this problem is IZ50240 (Reported component ID:
21965765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
f475b4a2
JB
2197If you need an ifix please contact your local IBM AIX support.
2198
39713df4
RGS
2199=head1 TODO
2200
2201=over 4
2202
2203=item Check if passed in handles are open for read/write
2204
2205Currently I don't know of any portable pure perl way to do this.
2206Suggestions welcome.
2207
b3200c5d
SP
2208=item Allow archives to be passed in as string
2209
2210Currently, we only allow opened filehandles or filenames, but
2211not strings. The internals would need some reworking to facilitate
2212stringified archives.
2213
2214=item Facilitate processing an opened filehandle of a compressed archive
2215
2216Currently, we only support this if the filehandle is an IO::Zlib object.
2217Environments, like apache, will present you with an opened filehandle
2218to an uploaded file, which might be a compressed archive.
2219
39713df4
RGS
2220=back
2221
f38c1908
SP
2222=head1 SEE ALSO
2223
2224=over 4
2225
2226=item The GNU tar specification
2227
2228C<http://www.gnu.org/software/tar/manual/tar.html>
2229
eadbb00b 2230=item The PAX format specification
f38c1908 2231
eadbb00b 2232The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
f38c1908
SP
2233
2234=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2235
2236=item GNU tar intends to switch to POSIX compatibility
2237
2238GNU Tar authors have expressed their intention to become completely
2239POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2240
2241=item A Comparison between various tar implementations
2242
2243Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2244
2245=back
2246
39713df4
RGS
2247=head1 AUTHOR
2248
c3745331
RGS
2249This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2250
2251Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
39713df4
RGS
2252
2253=head1 ACKNOWLEDGEMENTS
2254
f475b4a2
JB
2255Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2256Rainer Tammer and especially Andrew Savige for their help and suggestions.
39713df4
RGS
2257
2258=head1 COPYRIGHT
2259
f475b4a2 2260This module is copyright (c) 2002 - 2009 Jos Boumans
c3745331 2261E<lt>kane@cpan.orgE<gt>. All rights reserved.
39713df4 2262
e0d68803 2263This library is free software; you may redistribute and/or modify
c3745331 2264it under the same terms as Perl itself.
39713df4
RGS
2265
2266=cut