This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
error reporting of [$a ; $b] can be a TODO.
[perl5.git] / lib / Archive / Tar.pm
CommitLineData
39713df4 1### the gnu tar specification:
f38c1908 2### http://www.gnu.org/software/tar/manual/tar.html
39713df4
RGS
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use strict;
11use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
178aef9a
RGS
12 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING
13 $INSECURE_EXTRACT_MODE
14 ];
15
16$DEBUG = 0;
17$WARN = 1;
18$FOLLOW_SYMLINK = 0;
8d5f6fc7 19$VERSION = "1.38";
178aef9a
RGS
20$CHOWN = 1;
21$CHMOD = 1;
22$DO_NOT_USE_PREFIX = 0;
23$INSECURE_EXTRACT_MODE = 0;
39713df4
RGS
24
25BEGIN {
26 use Config;
27 $HAS_PERLIO = $Config::Config{useperlio};
28
29 ### try and load IO::String anyway, so you can dynamically
30 ### switch between perlio and IO::String
31 eval {
32 require IO::String;
33 import IO::String;
34 };
35 $HAS_IO_STRING = $@ ? 0 : 1;
36
37}
38
39use Cwd;
40use IO::File;
41use Carp qw(carp croak);
42use File::Spec ();
43use File::Spec::Unix ();
44use File::Path ();
45
46use Archive::Tar::File;
47use Archive::Tar::Constant;
48
49=head1 NAME
50
51Archive::Tar - module for manipulations of tar archives
52
53=head1 SYNOPSIS
54
55 use Archive::Tar;
56 my $tar = Archive::Tar->new;
57
58 $tar->read('origin.tgz',1);
59 $tar->extract();
60
61 $tar->add_files('file/foo.pl', 'docs/README');
62 $tar->add_data('file/baz.txt', 'This is the contents now');
63
64 $tar->rename('oldname', 'new/file/name');
65
66 $tar->write('files.tar');
67
68=head1 DESCRIPTION
69
70Archive::Tar provides an object oriented mechanism for handling tar
71files. It provides class methods for quick and easy files handling
72while also allowing for the creation of tar file objects for custom
73manipulation. If you have the IO::Zlib module installed,
74Archive::Tar will also support compressed or gzipped tar files.
75
76An object of class Archive::Tar represents a .tar(.gz) archive full
77of files and things.
78
79=head1 Object Methods
80
81=head2 Archive::Tar->new( [$file, $compressed] )
82
83Returns a new Tar object. If given any arguments, C<new()> calls the
84C<read()> method automatically, passing on the arguments provided to
85the C<read()> method.
86
87If C<new()> is invoked with arguments and the C<read()> method fails
88for any reason, C<new()> returns undef.
89
90=cut
91
92my $tmpl = {
93 _data => [ ],
94 _file => 'Unknown',
95};
96
97### install get/set accessors for this object.
98for my $key ( keys %$tmpl ) {
99 no strict 'refs';
100 *{__PACKAGE__."::$key"} = sub {
101 my $self = shift;
102 $self->{$key} = $_[0] if @_;
103 return $self->{$key};
104 }
105}
106
107sub new {
108 my $class = shift;
109 $class = ref $class if ref $class;
110
111 ### copying $tmpl here since a shallow copy makes it use the
112 ### same aref, causing for files to remain in memory always.
113 my $obj = bless { _data => [ ], _file => 'Unknown' }, $class;
114
115 if (@_) {
81a5970e
RGS
116 unless ( $obj->read( @_ ) ) {
117 $obj->_error(qq[No data could be read from file]);
118 return;
119 }
39713df4
RGS
120 }
121
122 return $obj;
123}
124
125=head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} )
126
127Read the given tar file into memory.
128The first argument can either be the name of a file or a reference to
129an already open filehandle (or an IO::Zlib object if it's compressed)
130The second argument indicates whether the file referenced by the first
131argument is compressed.
132
133The C<read> will I<replace> any previous content in C<$tar>!
134
135The second argument may be considered optional if IO::Zlib is
136installed, since it will transparently Do The Right Thing.
137Archive::Tar will warn if you try to pass a compressed file if
138IO::Zlib is not available and simply return.
139
b3200c5d
SP
140Note that you can currently B<not> pass a C<gzip> compressed
141filehandle, which is not opened with C<IO::Zlib>, nor a string
142containing the full archive information (either compressed or
143uncompressed). These are worth while features, but not currently
144implemented. See the C<TODO> section.
145
39713df4
RGS
146The third argument can be a hash reference with options. Note that
147all options are case-sensitive.
148
149=over 4
150
151=item limit
152
153Do not read more than C<limit> files. This is useful if you have
154very big archives, and are only interested in the first few files.
155
156=item extract
157
158If set to true, immediately extract entries when reading them. This
159gives you the same memory break as the C<extract_archive> function.
160Note however that entries will not be read into memory, but written
161straight to disk.
162
163=back
164
165All files are stored internally as C<Archive::Tar::File> objects.
166Please consult the L<Archive::Tar::File> documentation for details.
167
168Returns the number of files read in scalar context, and a list of
169C<Archive::Tar::File> objects in list context.
170
171=cut
172
173sub read {
174 my $self = shift;
175 my $file = shift;
176 my $gzip = shift || 0;
177 my $opts = shift || {};
178
179 unless( defined $file ) {
180 $self->_error( qq[No file to read from!] );
181 return;
182 } else {
183 $self->_file( $file );
184 }
185
186 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
187 or return;
188
189 my $data = $self->_read_tar( $handle, $opts ) or return;
190
191 $self->_data( $data );
192
193 return wantarray ? @$data : scalar @$data;
194}
195
196sub _get_handle {
197 my $self = shift;
198 my $file = shift; return unless defined $file;
199 return $file if ref $file;
200
201 my $gzip = shift || 0;
202 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
203
204 my $fh; my $bin;
205
206 ### only default to ZLIB if we're not trying to /write/ to a handle ###
207 if( ZLIB and $gzip || MODE_READ->( $mode ) ) {
208
209 ### IO::Zlib will Do The Right Thing, even when passed
210 ### a plain file ###
211 $fh = new IO::Zlib;
212
213 } else {
214 if( $gzip ) {
215 $self->_error(qq[Compression not available - Install IO::Zlib!]);
216 return;
217
218 } else {
219 $fh = new IO::File;
220 $bin++;
221 }
222 }
223
224 unless( $fh->open( $file, $mode ) ) {
225 $self->_error( qq[Could not create filehandle for '$file': $!!] );
226 return;
227 }
228
229 binmode $fh if $bin;
230
231 return $fh;
232}
233
234sub _read_tar {
235 my $self = shift;
236 my $handle = shift or return;
237 my $opts = shift || {};
238
239 my $count = $opts->{limit} || 0;
240 my $extract = $opts->{extract} || 0;
241
242 ### set a cap on the amount of files to extract ###
243 my $limit = 0;
244 $limit = 1 if $count > 0;
245
246 my $tarfile = [ ];
247 my $chunk;
248 my $read = 0;
249 my $real_name; # to set the name of a file when
250 # we're encountering @longlink
251 my $data;
252
253 LOOP:
254 while( $handle->read( $chunk, HEAD ) ) {
255 ### IO::Zlib doesn't support this yet
256 my $offset = eval { tell $handle } || 'unknown';
257
258 unless( $read++ ) {
259 my $gzip = GZIP_MAGIC_NUM;
260 if( $chunk =~ /$gzip/ ) {
261 $self->_error( qq[Cannot read compressed format in tar-mode] );
262 return;
263 }
264 }
265
266 ### if we can't read in all bytes... ###
267 last if length $chunk != HEAD;
268
269 ### Apparently this should really be two blocks of 512 zeroes,
270 ### but GNU tar sometimes gets it wrong. See comment in the
271 ### source code (tar.c) to GNU cpio.
272 next if $chunk eq TAR_END;
273
b30bcf62
RGS
274 ### according to the posix spec, the last 12 bytes of the header are
275 ### null bytes, to pad it to a 512 byte block. That means if these
276 ### bytes are NOT null bytes, it's a corrrupt header. See:
277 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
278 ### line 111
279 { my $nulls = join '', "\0" x 12;
280 unless( $nulls eq substr( $chunk, 500, 12 ) ) {
281 $self->_error( qq[Invalid header block at offset $offset] );
282 next LOOP;
283 }
284 }
285
81a5970e
RGS
286 ### pass the realname, so we can set it 'proper' right away
287 ### some of the heuristics are done on the name, so important
288 ### to set it ASAP
39713df4 289 my $entry;
81a5970e
RGS
290 { my %extra_args = ();
291 $extra_args{'name'} = $$real_name if defined $real_name;
292
293 unless( $entry = Archive::Tar::File->new( chunk => $chunk,
294 %extra_args )
295 ) {
296 $self->_error( qq[Couldn't read chunk at offset $offset] );
b30bcf62 297 next LOOP;
81a5970e 298 }
39713df4
RGS
299 }
300
301 ### ignore labels:
302 ### http://www.gnu.org/manual/tar/html_node/tar_139.html
303 next if $entry->is_label;
304
305 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
306
307 if ( $entry->is_file && !$entry->validate ) {
308 ### sometimes the chunk is rather fux0r3d and a whole 512
c3745331 309 ### bytes ends up in the ->name area.
39713df4
RGS
310 ### clean it up, if need be
311 my $name = $entry->name;
312 $name = substr($name, 0, 100) if length $name > 100;
313 $name =~ s/\n/ /g;
314
315 $self->_error( $name . qq[: checksum error] );
316 next LOOP;
317 }
318
319 my $block = BLOCK_SIZE->( $entry->size );
320
321 $data = $entry->get_content_by_ref;
322
323 ### just read everything into memory
324 ### can't do lazy loading since IO::Zlib doesn't support 'seek'
325 ### this is because Compress::Zlib doesn't support it =/
326 ### this reads in the whole data in one read() call.
327 if( $handle->read( $$data, $block ) < $block ) {
328 $self->_error( qq[Read error on tarfile (missing data) '].
329 $entry->full_path ."' at offset $offset" );
b30bcf62 330 next LOOP;
39713df4
RGS
331 }
332
333 ### throw away trailing garbage ###
376cc5ea 334 substr ($$data, $entry->size) = "" if defined $$data;
39713df4
RGS
335
336 ### part II of the @LongLink munging -- need to do /after/
337 ### the checksum check.
338 if( $entry->is_longlink ) {
339 ### weird thing in tarfiles -- if the file is actually a
340 ### @LongLink, the data part seems to have a trailing ^@
341 ### (unprintable) char. to display, pipe output through less.
342 ### but that doesn't *always* happen.. so check if the last
343 ### character is a control character, and if so remove it
344 ### at any rate, we better remove that character here, or tests
345 ### like 'eq' and hashlook ups based on names will SO not work
346 ### remove it by calculating the proper size, and then
347 ### tossing out everything that's longer than that size.
348
349 ### count number of nulls
350 my $nulls = $$data =~ tr/\0/\0/;
351
352 ### cut data + size by that many bytes
353 $entry->size( $entry->size - $nulls );
354 substr ($$data, $entry->size) = "";
355 }
356 }
357
358 ### clean up of the entries.. posix tar /apparently/ has some
359 ### weird 'feature' that allows for filenames > 255 characters
360 ### they'll put a header in with as name '././@LongLink' and the
361 ### contents will be the name of the /next/ file in the archive
362 ### pretty crappy and kludgy if you ask me
363
364 ### set the name for the next entry if this is a @LongLink;
365 ### this is one ugly hack =/ but needed for direct extraction
366 if( $entry->is_longlink ) {
367 $real_name = $data;
b30bcf62 368 next LOOP;
39713df4
RGS
369 } elsif ( defined $real_name ) {
370 $entry->name( $$real_name );
371 $entry->prefix('');
372 undef $real_name;
373 }
374
375 $self->_extract_file( $entry ) if $extract
376 && !$entry->is_longlink
377 && !$entry->is_unknown
378 && !$entry->is_label;
379
380 ### Guard against tarfiles with garbage at the end
381 last LOOP if $entry->name eq '';
382
383 ### push only the name on the rv if we're extracting
384 ### -- for extract_archive
385 push @$tarfile, ($extract ? $entry->name : $entry);
386
387 if( $limit ) {
388 $count-- unless $entry->is_longlink || $entry->is_dir;
389 last LOOP unless $count;
390 }
391 } continue {
392 undef $data;
393 }
394
395 return $tarfile;
396}
397
398=head2 $tar->contains_file( $filename )
399
400Check if the archive contains a certain file.
401It will return true if the file is in the archive, false otherwise.
402
403Note however, that this function does an exact match using C<eq>
404on the full path. So it cannot compensate for case-insensitive file-
405systems or compare 2 paths to see if they would point to the same
406underlying file.
407
408=cut
409
410sub contains_file {
411 my $self = shift;
01d11a1c
SP
412 my $full = shift;
413
414 return unless defined $full;
39713df4 415
c3745331
RGS
416 ### don't warn if the entry isn't there.. that's what this function
417 ### is for after all.
418 local $WARN = 0;
39713df4
RGS
419 return 1 if $self->_find_entry($full);
420 return;
421}
422
423=head2 $tar->extract( [@filenames] )
424
425Write files whose names are equivalent to any of the names in
426C<@filenames> to disk, creating subdirectories as necessary. This
427might not work too well under VMS.
428Under MacPerl, the file's modification time will be converted to the
429MacOS zero of time, and appropriate conversions will be done to the
430path. However, the length of each element of the path is not
431inspected to see whether it's longer than MacOS currently allows (32
432characters).
433
434If C<extract> is called without a list of file names, the entire
435contents of the archive are extracted.
436
437Returns a list of filenames extracted.
438
439=cut
440
441sub extract {
442 my $self = shift;
b30bcf62 443 my @args = @_;
39713df4
RGS
444 my @files;
445
f38c1908
SP
446 # use the speed optimization for all extracted files
447 local($self->{cwd}) = cwd() unless $self->{cwd};
448
39713df4 449 ### you requested the extraction of only certian files
b30bcf62
RGS
450 if( @args ) {
451 for my $file ( @args ) {
452
453 ### it's already an object?
454 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
455 push @files, $file;
456 next;
39713df4 457
b30bcf62
RGS
458 ### go find it then
459 } else {
460
461 my $found;
462 for my $entry ( @{$self->_data} ) {
463 next unless $file eq $entry->full_path;
464
465 ### we found the file you're looking for
466 push @files, $entry;
467 $found++;
468 }
469
470 unless( $found ) {
471 return $self->_error(
472 qq[Could not find '$file' in archive] );
473 }
39713df4
RGS
474 }
475 }
476
477 ### just grab all the file items
478 } else {
479 @files = $self->get_files;
480 }
481
482 ### nothing found? that's an error
483 unless( scalar @files ) {
484 $self->_error( qq[No files found for ] . $self->_file );
485 return;
486 }
487
488 ### now extract them
489 for my $entry ( @files ) {
490 unless( $self->_extract_file( $entry ) ) {
491 $self->_error(q[Could not extract ']. $entry->full_path .q['] );
492 return;
493 }
494 }
495
496 return @files;
497}
498
499=head2 $tar->extract_file( $file, [$extract_path] )
500
501Write an entry, whose name is equivalent to the file name provided to
48e76d2d 502disk. Optionally takes a second parameter, which is the full native
39713df4
RGS
503path (including filename) the entry will be written to.
504
505For example:
506
507 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
508
b30bcf62
RGS
509 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' );
510
39713df4
RGS
511Returns true on success, false on failure.
512
513=cut
514
515sub extract_file {
516 my $self = shift;
01d11a1c 517 my $file = shift; return unless defined $file;
39713df4
RGS
518 my $alt = shift;
519
520 my $entry = $self->_find_entry( $file )
521 or $self->_error( qq[Could not find an entry for '$file'] ), return;
522
523 return $self->_extract_file( $entry, $alt );
524}
525
526sub _extract_file {
527 my $self = shift;
528 my $entry = shift or return;
529 my $alt = shift;
39713df4
RGS
530
531 ### you wanted an alternate extraction location ###
532 my $name = defined $alt ? $alt : $entry->full_path;
533
534 ### splitpath takes a bool at the end to indicate
535 ### that it's splitting a dir
7f10f74b
SH
536 my ($vol,$dirs,$file);
537 if ( defined $alt ) { # It's a local-OS path
538 ($vol,$dirs,$file) = File::Spec->splitpath( $alt,
539 $entry->is_dir );
540 } else {
541 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
542 $entry->is_dir );
543 }
544
39713df4
RGS
545 my $dir;
546 ### is $name an absolute path? ###
547 if( File::Spec->file_name_is_absolute( $dirs ) ) {
178aef9a
RGS
548
549 ### absolute names are not allowed to be in tarballs under
550 ### strict mode, so only allow it if a user tells us to do it
551 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
552 $self->_error(
553 q[Entry ']. $entry->full_path .q[' is an absolute path. ].
554 q[Not extracting absolute paths under SECURE EXTRACT MODE]
555 );
556 return;
557 }
558
559 ### user asked us to, it's fine.
39713df4
RGS
560 $dir = $dirs;
561
562 ### it's a relative path ###
563 } else {
f38c1908 564 my $cwd = (defined $self->{cwd} ? $self->{cwd} : cwd());
f5afd28d 565
f5afd28d
NC
566 my @dirs = defined $alt
567 ? File::Spec->splitdir( $dirs ) # It's a local-OS path
568 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely
569 # straight from the tarball
178aef9a
RGS
570
571 ### paths that leave the current directory are not allowed under
572 ### strict mode, so only allow it if a user tells us to do this.
573 if( not defined $alt and
574 not $INSECURE_EXTRACT_MODE and
575 grep { $_ eq '..' } @dirs
576 ) {
577 $self->_error(
578 q[Entry ']. $entry->full_path .q[' is attempting to leave the ].
579 q[current working directory. Not extracting under SECURE ].
580 q[EXTRACT MODE]
581 );
582 return;
583 }
f5afd28d
NC
584
585 ### '.' is the directory delimiter, of which the first one has to
586 ### be escaped/changed.
587 map tr/\./_/, @dirs if ON_VMS;
588
48e76d2d
CB
589 my ($cwd_vol,$cwd_dir,$cwd_file)
590 = File::Spec->splitpath( $cwd );
591 my @cwd = File::Spec->splitdir( $cwd_dir );
592 push @cwd, $cwd_file if length $cwd_file;
81a5970e 593
f5afd28d
NC
594 ### We need to pass '' as the last elemant to catpath. Craig Berry
595 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
596 ### The root problem is that splitpath on UNIX always returns the
597 ### final path element as a file even if it is a directory, and of
598 ### course there is no way it can know the difference without checking
599 ### against the filesystem, which it is documented as not doing. When
600 ### you turn around and call catpath, on VMS you have to know which bits
601 ### are directory bits and which bits are file bits. In this case we
602 ### know the result should be a directory. I had thought you could omit
603 ### the file argument to catpath in such a case, but apparently on UNIX
604 ### you can't.
605 $dir = File::Spec->catpath(
606 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
607 );
608
609 ### catdir() returns undef if the path is longer than 255 chars on VMS
81a5970e
RGS
610 unless ( defined $dir ) {
611 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
612 return;
613 }
614
39713df4
RGS
615 }
616
617 if( -e $dir && !-d _ ) {
618 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
619 return;
620 }
621
622 unless ( -d _ ) {
623 eval { File::Path::mkpath( $dir, 0, 0777 ) };
624 if( $@ ) {
625 $self->_error( qq[Could not create directory '$dir': $@] );
626 return;
627 }
c3745331
RGS
628
629 ### XXX chown here? that might not be the same as in the archive
630 ### as we're only chown'ing to the owner of the file we're extracting
631 ### not to the owner of the directory itself, which may or may not
632 ### be another entry in the archive
633 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
634 ### way to go.
635 #if( $CHOWN && CAN_CHOWN ) {
636 # chown $entry->uid, $entry->gid, $dir or
637 # $self->_error( qq[Could not set uid/gid on '$dir'] );
638 #}
39713df4
RGS
639 }
640
641 ### we're done if we just needed to create a dir ###
642 return 1 if $entry->is_dir;
643
644 my $full = File::Spec->catfile( $dir, $file );
645
646 if( $entry->is_unknown ) {
647 $self->_error( qq[Unknown file type for file '$full'] );
648 return;
649 }
650
651 if( length $entry->type && $entry->is_file ) {
652 my $fh = IO::File->new;
653 $fh->open( '>' . $full ) or (
654 $self->_error( qq[Could not open file '$full': $!] ),
655 return
656 );
657
658 if( $entry->size ) {
659 binmode $fh;
660 syswrite $fh, $entry->data or (
661 $self->_error( qq[Could not write data to '$full'] ),
662 return
663 );
664 }
665
666 close $fh or (
667 $self->_error( qq[Could not close file '$full'] ),
668 return
669 );
670
671 } else {
672 $self->_make_special_file( $entry, $full ) or return;
673 }
674
675 utime time, $entry->mtime - TIME_OFFSET, $full or
676 $self->_error( qq[Could not update timestamp] );
677
678 if( $CHOWN && CAN_CHOWN ) {
679 chown $entry->uid, $entry->gid, $full or
680 $self->_error( qq[Could not set uid/gid on '$full'] );
681 }
682
683 ### only chmod if we're allowed to, but never chmod symlinks, since they'll
684 ### change the perms on the file they're linking too...
685 if( $CHMOD and not -l $full ) {
686 chmod $entry->mode, $full or
687 $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
688 }
689
690 return 1;
691}
692
693sub _make_special_file {
694 my $self = shift;
695 my $entry = shift or return;
696 my $file = shift; return unless defined $file;
697
698 my $err;
699
700 if( $entry->is_symlink ) {
701 my $fail;
702 if( ON_UNIX ) {
703 symlink( $entry->linkname, $file ) or $fail++;
704
705 } else {
706 $self->_extract_special_file_as_plain_file( $entry, $file )
707 or $fail++;
708 }
709
710 $err = qq[Making symbolink link from '] . $entry->linkname .
711 qq[' to '$file' failed] if $fail;
712
713 } elsif ( $entry->is_hardlink ) {
714 my $fail;
715 if( ON_UNIX ) {
716 link( $entry->linkname, $file ) or $fail++;
717
718 } else {
719 $self->_extract_special_file_as_plain_file( $entry, $file )
720 or $fail++;
721 }
722
723 $err = qq[Making hard link from '] . $entry->linkname .
724 qq[' to '$file' failed] if $fail;
725
726 } elsif ( $entry->is_fifo ) {
727 ON_UNIX && !system('mknod', $file, 'p') or
728 $err = qq[Making fifo ']. $entry->name .qq[' failed];
729
730 } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
731 my $mode = $entry->is_blockdev ? 'b' : 'c';
732
733 ON_UNIX && !system('mknod', $file, $mode,
734 $entry->devmajor, $entry->devminor) or
735 $err = qq[Making block device ']. $entry->name .qq[' (maj=] .
736 $entry->devmajor . qq[ min=] . $entry->devminor .
737 qq[) failed.];
738
739 } elsif ( $entry->is_socket ) {
740 ### the original doesn't do anything special for sockets.... ###
741 1;
742 }
743
744 return $err ? $self->_error( $err ) : 1;
745}
746
747### don't know how to make symlinks, let's just extract the file as
748### a plain file
749sub _extract_special_file_as_plain_file {
750 my $self = shift;
751 my $entry = shift or return;
752 my $file = shift; return unless defined $file;
753
754 my $err;
755 TRY: {
756 my $orig = $self->_find_entry( $entry->linkname );
757
758 unless( $orig ) {
759 $err = qq[Could not find file '] . $entry->linkname .
760 qq[' in memory.];
761 last TRY;
762 }
763
764 ### clone the entry, make it appear as a normal file ###
765 my $clone = $entry->clone;
766 $clone->_downgrade_to_plainfile;
767 $self->_extract_file( $clone, $file ) or last TRY;
768
769 return 1;
770 }
771
772 return $self->_error($err);
773}
774
775=head2 $tar->list_files( [\@properties] )
776
777Returns a list of the names of all the files in the archive.
778
779If C<list_files()> is passed an array reference as its first argument
780it returns a list of hash references containing the requested
781properties of each file. The following list of properties is
782supported: name, size, mtime (last modified date), mode, uid, gid,
783linkname, uname, gname, devmajor, devminor, prefix.
784
785Passing an array reference containing only one element, 'name', is
786special cased to return a list of names rather than a list of hash
787references, making it equivalent to calling C<list_files> without
788arguments.
789
790=cut
791
792sub list_files {
793 my $self = shift;
794 my $aref = shift || [ ];
795
796 unless( $self->_data ) {
797 $self->read() or return;
798 }
799
800 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
801 return map { $_->full_path } @{$self->_data};
802 } else {
803
804 #my @rv;
805 #for my $obj ( @{$self->_data} ) {
806 # push @rv, { map { $_ => $obj->$_() } @$aref };
807 #}
808 #return @rv;
809
810 ### this does the same as the above.. just needs a +{ }
811 ### to make sure perl doesn't confuse it for a block
812 return map { my $o=$_;
813 +{ map { $_ => $o->$_() } @$aref }
814 } @{$self->_data};
815 }
816}
817
818sub _find_entry {
819 my $self = shift;
820 my $file = shift;
821
822 unless( defined $file ) {
823 $self->_error( qq[No file specified] );
824 return;
825 }
826
b30bcf62
RGS
827 ### it's an object already
828 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
829
39713df4
RGS
830 for my $entry ( @{$self->_data} ) {
831 my $path = $entry->full_path;
832 return $entry if $path eq $file;
833 }
834
835 $self->_error( qq[No such file in archive: '$file'] );
836 return;
837}
838
839=head2 $tar->get_files( [@filenames] )
840
841Returns the C<Archive::Tar::File> objects matching the filenames
842provided. If no filename list was passed, all C<Archive::Tar::File>
843objects in the current Tar object are returned.
844
845Please refer to the C<Archive::Tar::File> documentation on how to
846handle these objects.
847
848=cut
849
850sub get_files {
851 my $self = shift;
852
853 return @{ $self->_data } unless @_;
854
855 my @list;
856 for my $file ( @_ ) {
857 push @list, grep { defined } $self->_find_entry( $file );
858 }
859
860 return @list;
861}
862
863=head2 $tar->get_content( $file )
864
865Return the content of the named file.
866
867=cut
868
869sub get_content {
870 my $self = shift;
871 my $entry = $self->_find_entry( shift ) or return;
872
873 return $entry->data;
874}
875
876=head2 $tar->replace_content( $file, $content )
877
878Make the string $content be the content for the file named $file.
879
880=cut
881
882sub replace_content {
883 my $self = shift;
884 my $entry = $self->_find_entry( shift ) or return;
885
886 return $entry->replace_content( shift );
887}
888
889=head2 $tar->rename( $file, $new_name )
890
891Rename the file of the in-memory archive to $new_name.
892
893Note that you must specify a Unix path for $new_name, since per tar
894standard, all files in the archive must be Unix paths.
895
896Returns true on success and false on failure.
897
898=cut
899
900sub rename {
901 my $self = shift;
902 my $file = shift; return unless defined $file;
903 my $new = shift; return unless defined $new;
904
905 my $entry = $self->_find_entry( $file ) or return;
906
907 return $entry->rename( $new );
908}
909
910=head2 $tar->remove (@filenamelist)
911
912Removes any entries with names matching any of the given filenames
913from the in-memory archive. Returns a list of C<Archive::Tar::File>
914objects that remain.
915
916=cut
917
918sub remove {
919 my $self = shift;
920 my @list = @_;
921
922 my %seen = map { $_->full_path => $_ } @{$self->_data};
923 delete $seen{ $_ } for @list;
924
925 $self->_data( [values %seen] );
926
927 return values %seen;
928}
929
930=head2 $tar->clear
931
932C<clear> clears the current in-memory archive. This effectively gives
933you a 'blank' object, ready to be filled again. Note that C<clear>
934only has effect on the object, not the underlying tarfile.
935
936=cut
937
938sub clear {
939 my $self = shift or return;
940
941 $self->_data( [] );
942 $self->_file( '' );
943
944 return 1;
945}
946
947
948=head2 $tar->write ( [$file, $compressed, $prefix] )
949
950Write the in-memory archive to disk. The first argument can either
951be the name of a file or a reference to an already open filehandle (a
952GLOB reference). If the second argument is true, the module will use
953IO::Zlib to write the file in a compressed format. If IO::Zlib is
954not available, the C<write> method will fail and return.
955
956Note that when you pass in a filehandle, the compression argument
957is ignored, as all files are printed verbatim to your filehandle.
958If you wish to enable compression with filehandles, use an
959C<IO::Zlib> filehandle instead.
960
961Specific levels of compression can be chosen by passing the values 2
962through 9 as the second parameter.
963
964The third argument is an optional prefix. All files will be tucked
965away in the directory you specify as prefix. So if you have files
966'a' and 'b' in your archive, and you specify 'foo' as prefix, they
967will be written to the archive as 'foo/a' and 'foo/b'.
968
969If no arguments are given, C<write> returns the entire formatted
970archive as a string, which could be useful if you'd like to stuff the
971archive into a socket or a pipe to gzip or something.
972
973=cut
974
975sub write {
976 my $self = shift;
977 my $file = shift; $file = '' unless defined $file;
978 my $gzip = shift || 0;
979 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
980 my $dummy = '';
981
982 ### only need a handle if we have a file to print to ###
983 my $handle = length($file)
984 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
985 or return )
986 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h }
987 : $HAS_IO_STRING ? IO::String->new
988 : __PACKAGE__->no_string_support();
989
990
991
992 for my $entry ( @{$self->_data} ) {
993 ### entries to be written to the tarfile ###
994 my @write_me;
995
996 ### only now will we change the object to reflect the current state
997 ### of the name and prefix fields -- this needs to be limited to
998 ### write() only!
999 my $clone = $entry->clone;
1000
1001
1002 ### so, if you don't want use to use the prefix, we'll stuff
1003 ### everything in the name field instead
1004 if( $DO_NOT_USE_PREFIX ) {
1005
1006 ### you might have an extended prefix, if so, set it in the clone
1007 ### XXX is ::Unix right?
1008 $clone->name( length $ext_prefix
1009 ? File::Spec::Unix->catdir( $ext_prefix,
1010 $clone->full_path)
1011 : $clone->full_path );
1012 $clone->prefix( '' );
1013
1014 ### otherwise, we'll have to set it properly -- prefix part in the
1015 ### prefix and name part in the name field.
1016 } else {
1017
1018 ### split them here, not before!
1019 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1020
1021 ### you might have an extended prefix, if so, set it in the clone
1022 ### XXX is ::Unix right?
1023 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1024 if length $ext_prefix;
1025
1026 $clone->prefix( $prefix );
1027 $clone->name( $name );
1028 }
1029
1030 ### names are too long, and will get truncated if we don't add a
1031 ### '@LongLink' file...
1032 my $make_longlink = ( length($clone->name) > NAME_LENGTH or
1033 length($clone->prefix) > PREFIX_LENGTH
1034 ) || 0;
1035
1036 ### perhaps we need to make a longlink file?
1037 if( $make_longlink ) {
1038 my $longlink = Archive::Tar::File->new(
1039 data => LONGLINK_NAME,
1040 $clone->full_path,
1041 { type => LONGLINK }
1042 );
1043
1044 unless( $longlink ) {
1045 $self->_error( qq[Could not create 'LongLink' entry for ] .
1046 qq[oversize file '] . $clone->full_path ."'" );
1047 return;
1048 };
1049
1050 push @write_me, $longlink;
1051 }
1052
1053 push @write_me, $clone;
1054
1055 ### write the one, optionally 2 a::t::file objects to the handle
1056 for my $clone (@write_me) {
1057
1058 ### if the file is a symlink, there are 2 options:
1059 ### either we leave the symlink intact, but then we don't write any
1060 ### data OR we follow the symlink, which means we actually make a
1061 ### copy. if we do the latter, we have to change the TYPE of the
1062 ### clone to 'FILE'
1063 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1064 my $data_ok = !$clone->is_symlink && $clone->has_content;
1065
1066 ### downgrade to a 'normal' file if it's a symlink we're going to
1067 ### treat as a regular file
1068 $clone->_downgrade_to_plainfile if $link_ok;
1069
1070 ### get the header for this block
1071 my $header = $self->_format_tar_entry( $clone );
1072 unless( $header ) {
1073 $self->_error(q[Could not format header for: ] .
1074 $clone->full_path );
1075 return;
1076 }
1077
1078 unless( print $handle $header ) {
1079 $self->_error(q[Could not write header for: ] .
1080 $clone->full_path);
1081 return;
1082 }
1083
1084 if( $link_ok or $data_ok ) {
1085 unless( print $handle $clone->data ) {
1086 $self->_error(q[Could not write data for: ] .
1087 $clone->full_path);
1088 return;
1089 }
1090
1091 ### pad the end of the clone if required ###
1092 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1093 }
1094
1095 } ### done writing these entries
1096 }
1097
1098 ### write the end markers ###
1099 print $handle TAR_END x 2 or
1100 return $self->_error( qq[Could not write tar end markers] );
b30bcf62 1101
39713df4 1102 ### did you want it written to a file, or returned as a string? ###
b30bcf62 1103 my $rv = length($file) ? 1
39713df4 1104 : $HAS_PERLIO ? $dummy
b30bcf62
RGS
1105 : do { seek $handle, 0, 0; local $/; <$handle> };
1106
1107 ### make sure to close the handle;
1108 close $handle;
1109
1110 return $rv;
39713df4
RGS
1111}
1112
1113sub _format_tar_entry {
1114 my $self = shift;
1115 my $entry = shift or return;
1116 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
1117 my $no_prefix = shift || 0;
1118
1119 my $file = $entry->name;
1120 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
1121
1122 ### remove the prefix from the file name
1123 ### not sure if this is still neeeded --kane
1124 ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1125 ### this for us. Even worse, this would break if we tried to add a file
1126 ### like x/x.
1127 #if( length $prefix ) {
1128 # $file =~ s/^$match//;
1129 #}
1130
1131 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1132 if length $ext_prefix;
1133
1134 ### not sure why this is... ###
1135 my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1136 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1137
1138 my $f1 = "%06o"; my $f2 = "%11o";
1139
1140 ### this might be optimizable with a 'changed' flag in the file objects ###
1141 my $tar = pack (
1142 PACK,
1143 $file,
1144
1145 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1146 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1147
1148 "", # checksum field - space padded a bit down
1149
1150 (map { $entry->$_() } qw[type linkname magic]),
1151
1152 $entry->version || TAR_VERSION,
1153
1154 (map { $entry->$_() } qw[uname gname]),
1155 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1156
1157 ($no_prefix ? '' : $prefix)
1158 );
1159
1160 ### add the checksum ###
1161 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1162
1163 return $tar;
1164}
1165
1166=head2 $tar->add_files( @filenamelist )
1167
1168Takes a list of filenames and adds them to the in-memory archive.
1169
1170The path to the file is automatically converted to a Unix like
1171equivalent for use in the archive, and, if on MacOS, the file's
1172modification time is converted from the MacOS epoch to the Unix epoch.
1173So tar archives created on MacOS with B<Archive::Tar> can be read
1174both with I<tar> on Unix and applications like I<suntar> or
1175I<Stuffit Expander> on MacOS.
1176
1177Be aware that the file's type/creator and resource fork will be lost,
1178which is usually what you want in cross-platform archives.
1179
1180Returns a list of C<Archive::Tar::File> objects that were just added.
1181
1182=cut
1183
1184sub add_files {
1185 my $self = shift;
1186 my @files = @_ or return;
1187
1188 my @rv;
1189 for my $file ( @files ) {
c3745331 1190 unless( -e $file || -l $file ) {
39713df4
RGS
1191 $self->_error( qq[No such file: '$file'] );
1192 next;
1193 }
1194
1195 my $obj = Archive::Tar::File->new( file => $file );
1196 unless( $obj ) {
1197 $self->_error( qq[Unable to add file: '$file'] );
1198 next;
1199 }
1200
1201 push @rv, $obj;
1202 }
1203
1204 push @{$self->{_data}}, @rv;
1205
1206 return @rv;
1207}
1208
1209=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1210
1211Takes a filename, a scalar full of data and optionally a reference to
1212a hash with specific options.
1213
1214Will add a file to the in-memory archive, with name C<$filename> and
1215content C<$data>. Specific properties can be set using C<$opthashref>.
1216The following list of properties is supported: name, size, mtime
1217(last modified date), mode, uid, gid, linkname, uname, gname,
b3200c5d 1218devmajor, devminor, prefix, type. (On MacOS, the file's path and
39713df4
RGS
1219modification times are converted to Unix equivalents.)
1220
b3200c5d
SP
1221Valid values for the file type are the following constants defined in
1222Archive::Tar::Constants:
1223
1224=over 4
1225
1226=item FILE
1227
1228Regular file.
1229
1230=item HARDLINK
1231
1232=item SYMLINK
1233
1234Hard and symbolic ("soft") links; linkname should specify target.
1235
1236=item CHARDEV
1237
1238=item BLOCKDEV
1239
1240Character and block devices. devmajor and devminor should specify the major
1241and minor device numbers.
1242
1243=item DIR
1244
1245Directory.
1246
1247=item FIFO
1248
1249FIFO (named pipe).
1250
1251=item SOCKET
1252
1253Socket.
1254
1255=back
1256
39713df4
RGS
1257Returns the C<Archive::Tar::File> object that was just added, or
1258C<undef> on failure.
1259
1260=cut
1261
1262sub add_data {
1263 my $self = shift;
1264 my ($file, $data, $opt) = @_;
1265
1266 my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1267 unless( $obj ) {
1268 $self->_error( qq[Unable to add file: '$file'] );
1269 return;
1270 }
1271
1272 push @{$self->{_data}}, $obj;
1273
1274 return $obj;
1275}
1276
1277=head2 $tar->error( [$BOOL] )
1278
1279Returns the current errorstring (usually, the last error reported).
1280If a true value was specified, it will give the C<Carp::longmess>
1281equivalent of the error, in effect giving you a stacktrace.
1282
1283For backwards compatibility, this error is also available as
1284C<$Archive::Tar::error> although it is much recommended you use the
1285method call instead.
1286
1287=cut
1288
1289{
1290 $error = '';
1291 my $longmess;
1292
1293 sub _error {
1294 my $self = shift;
1295 my $msg = $error = shift;
1296 $longmess = Carp::longmess($error);
1297
1298 ### set Archive::Tar::WARN to 0 to disable printing
1299 ### of errors
1300 if( $WARN ) {
1301 carp $DEBUG ? $longmess : $msg;
1302 }
1303
1304 return;
1305 }
1306
1307 sub error {
1308 my $self = shift;
1309 return shift() ? $longmess : $error;
1310 }
1311}
1312
f38c1908
SP
1313=head2 $tar->setcwd( $cwd );
1314
1315C<Archive::Tar> needs to know the current directory, and it will run
1316C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1317tarfile and saves it in the file system. (As of version 1.30, however,
1318C<Archive::Tar> will use the speed optimization described below
1319automatically, so it's only relevant if you're using C<extract_file()>).
1320
1321Since C<Archive::Tar> doesn't change the current directory internally
1322while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1323can be avoided if we can guarantee that the current directory doesn't
1324get changed externally.
1325
1326To use this performance boost, set the current directory via
1327
1328 use Cwd;
1329 $tar->setcwd( cwd() );
1330
1331once before calling a function like C<extract_file> and
1332C<Archive::Tar> will use the current directory setting from then on
1333and won't call C<Cwd::cwd()> internally.
1334
1335To switch back to the default behaviour, use
1336
1337 $tar->setcwd( undef );
1338
1339and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1340
1341If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will
1342be called for you.
1343
1344=cut
1345
1346sub setcwd {
1347 my $self = shift;
1348 my $cwd = shift;
1349
1350 $self->{cwd} = $cwd;
1351}
39713df4
RGS
1352
1353=head2 $bool = $tar->has_io_string
1354
1355Returns true if we currently have C<IO::String> support loaded.
1356
1357Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1358stringified archives. Currently, C<perlio> is the preferred method, if
39713df4
RGS
1359available.
1360
1361See the C<GLOBAL VARIABLES> section to see how to change this preference.
1362
1363=cut
1364
1365sub has_io_string { return $HAS_IO_STRING; }
1366
1367=head2 $bool = $tar->has_perlio
1368
1369Returns true if we currently have C<perlio> support loaded.
1370
1371This requires C<perl-5.8> or higher, compiled with C<perlio>
1372
1373Either C<IO::String> or C<perlio> support is needed to support writing
3c4b39be 1374stringified archives. Currently, C<perlio> is the preferred method, if
39713df4
RGS
1375available.
1376
1377See the C<GLOBAL VARIABLES> section to see how to change this preference.
1378
1379=cut
1380
1381sub has_perlio { return $HAS_PERLIO; }
1382
1383
1384=head1 Class Methods
1385
1386=head2 Archive::Tar->create_archive($file, $compression, @filelist)
1387
1388Creates a tar file from the list of files provided. The first
1389argument can either be the name of the tar file to create or a
1390reference to an open file handle (e.g. a GLOB reference).
1391
1392The second argument specifies the level of compression to be used, if
1393any. Compression of tar files requires the installation of the
1394IO::Zlib module. Specific levels of compression may be
1395requested by passing a value between 2 and 9 as the second argument.
1396Any other value evaluating as true will result in the default
1397compression level being used.
1398
1399Note that when you pass in a filehandle, the compression argument
1400is ignored, as all files are printed verbatim to your filehandle.
1401If you wish to enable compression with filehandles, use an
1402C<IO::Zlib> filehandle instead.
1403
1404The remaining arguments list the files to be included in the tar file.
1405These files must all exist. Any files which don't exist or can't be
1406read are silently ignored.
1407
1408If the archive creation fails for any reason, C<create_archive> will
1409return false. Please use the C<error> method to find the cause of the
1410failure.
1411
1412Note that this method does not write C<on the fly> as it were; it
1413still reads all the files into memory before writing out the archive.
1414Consult the FAQ below if this is a problem.
1415
1416=cut
1417
1418sub create_archive {
1419 my $class = shift;
1420
1421 my $file = shift; return unless defined $file;
1422 my $gzip = shift || 0;
1423 my @files = @_;
1424
1425 unless( @files ) {
1426 return $class->_error( qq[Cowardly refusing to create empty archive!] );
1427 }
1428
1429 my $tar = $class->new;
1430 $tar->add_files( @files );
1431 return $tar->write( $file, $gzip );
1432}
1433
1434=head2 Archive::Tar->list_archive ($file, $compressed, [\@properties])
1435
1436Returns a list of the names of all the files in the archive. The
1437first argument can either be the name of the tar file to list or a
1438reference to an open file handle (e.g. a GLOB reference).
1439
1440If C<list_archive()> is passed an array reference as its third
1441argument it returns a list of hash references containing the requested
1442properties of each file. The following list of properties is
b3200c5d
SP
1443supported: full_path, name, size, mtime (last modified date), mode,
1444uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
1445
1446See C<Archive::Tar::File> for details about supported properties.
39713df4
RGS
1447
1448Passing an array reference containing only one element, 'name', is
1449special cased to return a list of names rather than a list of hash
1450references.
1451
1452=cut
1453
1454sub list_archive {
1455 my $class = shift;
1456 my $file = shift; return unless defined $file;
1457 my $gzip = shift || 0;
1458
1459 my $tar = $class->new($file, $gzip);
1460 return unless $tar;
1461
1462 return $tar->list_files( @_ );
1463}
1464
1465=head2 Archive::Tar->extract_archive ($file, $gzip)
1466
1467Extracts the contents of the tar file. The first argument can either
1468be the name of the tar file to create or a reference to an open file
1469handle (e.g. a GLOB reference). All relative paths in the tar file will
1470be created underneath the current working directory.
1471
1472C<extract_archive> will return a list of files it extracted.
1473If the archive extraction fails for any reason, C<extract_archive>
1474will return false. Please use the C<error> method to find the cause
1475of the failure.
1476
1477=cut
1478
1479sub extract_archive {
1480 my $class = shift;
1481 my $file = shift; return unless defined $file;
1482 my $gzip = shift || 0;
1483
1484 my $tar = $class->new( ) or return;
1485
1486 return $tar->read( $file, $gzip, { extract => 1 } );
1487}
1488
1489=head2 Archive::Tar->can_handle_compressed_files
1490
1491A simple checking routine, which will return true if C<Archive::Tar>
1492is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1493or false if C<IO::Zlib> is not installed.
1494
1495You can use this as a shortcut to determine whether C<Archive::Tar>
1496will do what you think before passing compressed archives to its
1497C<read> method.
1498
1499=cut
1500
1501sub can_handle_compressed_files { return ZLIB ? 1 : 0 }
1502
1503sub no_string_support {
1504 croak("You have to install IO::String to support writing archives to strings");
1505}
1506
15071;
1508
1509__END__
1510
1511=head1 GLOBAL VARIABLES
1512
1513=head2 $Archive::Tar::FOLLOW_SYMLINK
1514
1515Set this variable to C<1> to make C<Archive::Tar> effectively make a
1516copy of the file when extracting. Default is C<0>, which
1517means the symlink stays intact. Of course, you will have to pack the
1518file linked to as well.
1519
1520This option is checked when you write out the tarfile using C<write>
1521or C<create_archive>.
1522
1523This works just like C</bin/tar>'s C<-h> option.
1524
1525=head2 $Archive::Tar::CHOWN
1526
1527By default, C<Archive::Tar> will try to C<chown> your files if it is
1528able to. In some cases, this may not be desired. In that case, set
1529this variable to C<0> to disable C<chown>-ing, even if it were
1530possible.
1531
1532The default is C<1>.
1533
1534=head2 $Archive::Tar::CHMOD
1535
1536By default, C<Archive::Tar> will try to C<chmod> your files to
1537whatever mode was specified for the particular file in the archive.
1538In some cases, this may not be desired. In that case, set this
1539variable to C<0> to disable C<chmod>-ing.
1540
1541The default is C<1>.
1542
1543=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1544
f38c1908
SP
1545By default, C<Archive::Tar> will try to put paths that are over
1546100 characters in the C<prefix> field of your tar header, as
1547defined per POSIX-standard. However, some (older) tar programs
1548do not implement this spec. To retain compatibility with these older
1549or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1550variable to a true value, and C<Archive::Tar> will use an alternate
1551way of dealing with paths over 100 characters by using the
1552C<GNU Extended Header> feature.
1553
1554Note that clients who do not support the C<GNU Extended Header>
1555feature will not be able to read these archives. Such clients include
1556tars on C<Solaris>, C<Irix> and C<AIX>.
39713df4
RGS
1557
1558The default is C<0>.
1559
1560=head2 $Archive::Tar::DEBUG
1561
1562Set this variable to C<1> to always get the C<Carp::longmess> output
1563of the warnings, instead of the regular C<carp>. This is the same
1564message you would get by doing:
1565
1566 $tar->error(1);
1567
1568Defaults to C<0>.
1569
1570=head2 $Archive::Tar::WARN
1571
1572Set this variable to C<0> if you do not want any warnings printed.
1573Personally I recommend against doing this, but people asked for the
1574option. Also, be advised that this is of course not threadsafe.
1575
1576Defaults to C<1>.
1577
1578=head2 $Archive::Tar::error
1579
1580Holds the last reported error. Kept for historical reasons, but its
1581use is very much discouraged. Use the C<error()> method instead:
1582
1583 warn $tar->error unless $tar->extract;
1584
178aef9a
RGS
1585=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
1586
1587This variable indicates whether C<Archive::Tar> should allow
1588files to be extracted outside their current working directory.
1589
1590Allowing this could have security implications, as a malicious
1591tar archive could alter or replace any file the extracting user
1592has permissions to. Therefor, the default is to not allow
1593insecure extractions.
1594
1595If you trust the archive, or have other reasons to allow the
1596archive to write files outside your current working directory,
1597set this variable to C<true>.
1598
1599Note that this is a backwards incompatible change from version
1600C<1.36> and before.
1601
39713df4
RGS
1602=head2 $Archive::Tar::HAS_PERLIO
1603
1604This variable holds a boolean indicating if we currently have
1605C<perlio> support loaded. This will be enabled for any perl
1606greater than C<5.8> compiled with C<perlio>.
1607
1608If you feel strongly about disabling it, set this variable to
1609C<false>. Note that you will then need C<IO::String> installed
1610to support writing stringified archives.
1611
1612Don't change this variable unless you B<really> know what you're
1613doing.
1614
1615=head2 $Archive::Tar::HAS_IO_STRING
1616
1617This variable holds a boolean indicating if we currently have
1618C<IO::String> support loaded. This will be enabled for any perl
1619that has a loadable C<IO::String> module.
1620
1621If you feel strongly about disabling it, set this variable to
1622C<false>. Note that you will then need C<perlio> support from
1623your perl to be able to write stringified archives.
1624
1625Don't change this variable unless you B<really> know what you're
1626doing.
1627
1628=head1 FAQ
1629
1630=over 4
1631
1632=item What's the minimum perl version required to run Archive::Tar?
1633
1634You will need perl version 5.005_03 or newer.
1635
1636=item Isn't Archive::Tar slow?
1637
1638Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
1639However, it's very portable. If speed is an issue, consider using
1640C</bin/tar> instead.
1641
1642=item Isn't Archive::Tar heavier on memory than /bin/tar?
1643
1644Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
1645C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
1646choice but to read the archive into memory.
1647This is ok if you want to do in-memory manipulation of the archive.
1648If you just want to extract, use the C<extract_archive> class method
1649instead. It will optimize and write to disk immediately.
1650
1651=item Can't you lazy-load data instead?
1652
1653No, not easily. See previous question.
1654
1655=item How much memory will an X kb tar file need?
1656
1657Probably more than X kb, since it will all be read into memory. If
1658this is a problem, and you don't need to do in memory manipulation
1659of the archive, consider using C</bin/tar> instead.
1660
1661=item What do you do with unsupported filetypes in an archive?
1662
1663C<Unix> has a few filetypes that aren't supported on other platforms,
1664like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
1665try to make a copy of the original file, rather than throwing an error.
1666
1667This does require you to read the entire archive in to memory first,
1668since otherwise we wouldn't know what data to fill the copy with.
1669(This means that you cannot use the class methods on archives that
1670have incompatible filetypes and still expect things to work).
1671
1672For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
1673the extraction of this particular item didn't work.
1674
f38c1908
SP
1675=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
1676
1677By default, C<Archive::Tar> is in a completely POSIX-compatible
1678mode, which uses the POSIX-specification of C<tar> to store files.
1679For paths greather than 100 characters, this is done using the
1680C<POSIX header prefix>. Non-POSIX-compatible clients may not support
1681this part of the specification, and may only support the C<GNU Extended
1682Header> functionality. To facilitate those clients, you can set the
1683C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
1684C<GLOBAL VARIABLES> section for details on this variable.
1685
c3745331
RGS
1686Note that GNU tar earlier than version 1.14 does not cope well with
1687the C<POSIX header prefix>. If you use such a version, consider setting
1688the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
1689
b30bcf62
RGS
1690=item How do I extract only files that have property X from an archive?
1691
1692Sometimes, you might not wish to extract a complete archive, just
1693the files that are relevant to you, based on some criteria.
1694
1695You can do this by filtering a list of C<Archive::Tar::File> objects
1696based on your criteria. For example, to extract only files that have
1697the string C<foo> in their title, you would use:
1698
1699 $tar->extract(
1700 grep { $_->full_path =~ /foo/ } $tar->get_files
1701 );
1702
1703This way, you can filter on any attribute of the files in the archive.
1704Consult the C<Archive::Tar::File> documentation on how to use these
1705objects.
1706
81a5970e
RGS
1707=item How do I access .tar.Z files?
1708
1709The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
1710the C<IO::Zlib> module) to access tar files that have been compressed
1711with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
1712utility cannot be read by C<Compress::Zlib> and so cannot be directly
1713accesses by C<Archive::Tar>.
1714
1715If the C<uncompress> or C<gunzip> programs are available, you can use
1716one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
1717
1718Firstly with C<uncompress>
1719
1720 use Archive::Tar;
1721
1722 open F, "uncompress -c $filename |";
1723 my $tar = Archive::Tar->new(*F);
1724 ...
1725
1726and this with C<gunzip>
1727
1728 use Archive::Tar;
1729
1730 open F, "gunzip -c $filename |";
1731 my $tar = Archive::Tar->new(*F);
1732 ...
1733
1734Similarly, if the C<compress> program is available, you can use this to
1735write a C<.tar.Z> file
1736
1737 use Archive::Tar;
1738 use IO::File;
1739
1740 my $fh = new IO::File "| compress -c >$filename";
1741 my $tar = Archive::Tar->new();
1742 ...
1743 $tar->write($fh);
1744 $fh->close ;
1745
01d11a1c
SP
1746=item How do I handle Unicode strings?
1747
1748C<Archive::Tar> uses byte semantics for any files it reads from or writes
1749to disk. This is not a problem if you only deal with files and never
1750look at their content or work solely with byte strings. But if you use
1751Unicode strings with character semantics, some additional steps need
1752to be taken.
1753
1754For example, if you add a Unicode string like
1755
1756 # Problem
1757 $tar->add_data('file.txt', "Euro: \x{20AC}");
1758
1759then there will be a problem later when the tarfile gets written out
1760to disk via C<$tar->write()>:
1761
1762 Wide character in print at .../Archive/Tar.pm line 1014.
1763
1764The data was added as a Unicode string and when writing it out to disk,
1765the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
1766tried to convert the string to ISO-8859 and failed. The written file
1767now contains garbage.
1768
1769For this reason, Unicode strings need to be converted to UTF-8-encoded
1770bytestrings before they are handed off to C<add_data()>:
1771
1772 use Encode;
1773 my $data = "Accented character: \x{20AC}";
1774 $data = encode('utf8', $data);
1775
1776 $tar->add_data('file.txt', $data);
1777
1778A opposite problem occurs if you extract a UTF8-encoded file from a
1779tarball. Using C<get_content()> on the C<Archive::Tar::File> object
1780will return its content as a bytestring, not as a Unicode string.
1781
1782If you want it to be a Unicode string (because you want character
1783semantics with operations like regular expression matching), you need
1784to decode the UTF8-encoded content and have Perl convert it into
1785a Unicode string:
1786
1787 use Encode;
1788 my $data = $tar->get_content();
1789
1790 # Make it a Unicode string
1791 $data = decode('utf8', $data);
1792
1793There is no easy way to provide this functionality in C<Archive::Tar>,
1794because a tarball can contain many files, and each of which could be
1795encoded in a different way.
81a5970e 1796
39713df4
RGS
1797=back
1798
1799=head1 TODO
1800
1801=over 4
1802
1803=item Check if passed in handles are open for read/write
1804
1805Currently I don't know of any portable pure perl way to do this.
1806Suggestions welcome.
1807
b3200c5d
SP
1808=item Allow archives to be passed in as string
1809
1810Currently, we only allow opened filehandles or filenames, but
1811not strings. The internals would need some reworking to facilitate
1812stringified archives.
1813
1814=item Facilitate processing an opened filehandle of a compressed archive
1815
1816Currently, we only support this if the filehandle is an IO::Zlib object.
1817Environments, like apache, will present you with an opened filehandle
1818to an uploaded file, which might be a compressed archive.
1819
39713df4
RGS
1820=back
1821
f38c1908
SP
1822=head1 SEE ALSO
1823
1824=over 4
1825
1826=item The GNU tar specification
1827
1828C<http://www.gnu.org/software/tar/manual/tar.html>
1829
1830=item The PAX format specication
1831
1832The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
1833
1834=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
1835
1836=item GNU tar intends to switch to POSIX compatibility
1837
1838GNU Tar authors have expressed their intention to become completely
1839POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
1840
1841=item A Comparison between various tar implementations
1842
1843Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
1844
1845=back
1846
39713df4
RGS
1847=head1 AUTHOR
1848
c3745331
RGS
1849This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
1850
1851Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
39713df4
RGS
1852
1853=head1 ACKNOWLEDGEMENTS
1854
1855Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney and
1856especially Andrew Savige for their help and suggestions.
1857
1858=head1 COPYRIGHT
1859
c3745331
RGS
1860This module is copyright (c) 2002 - 2007 Jos Boumans
1861E<lt>kane@cpan.orgE<gt>. All rights reserved.
39713df4 1862
c3745331
RGS
1863This library is free software; you may redistribute and/or modify
1864it under the same terms as Perl itself.
39713df4
RGS
1865
1866=cut