use strict;
use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
$DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
- $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT
+ $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
];
@ISA = qw[Exporter];
$DEBUG = 0;
$WARN = 1;
$FOLLOW_SYMLINK = 0;
-$VERSION = "1.62";
+$VERSION = "2.14";
$CHOWN = 1;
$CHMOD = 1;
$SAME_PERMISSIONS = $> == 0 ? 1 : 0;
$DO_NOT_USE_PREFIX = 0;
$INSECURE_EXTRACT_MODE = 0;
$ZERO_PAD_NUMBERS = 0;
+$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
BEGIN {
use Config;
$tar->add_data('file/baz.txt', 'This is the contents now');
$tar->rename('oldname', 'new/file/name');
+ $tar->chown('/', 'root');
+ $tar->chown('/', 'root:root');
+ $tar->chmod('/tmp', '1777');
$tar->write('files.tar'); # plain tar
$tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed
Can be set to a regular expression. Only files with names that match
the expression will be read.
+=item md5
+
+Set to 1 and the md5sum of files will be returned (instead of file data)
+ my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} );
+ while( my $f = $iter->() ) {
+ print $f->data . "\t" . $f->full_path . $/;
+ }
+
=item extract
If set to true, immediately extract entries when reading them. This
sub _get_handle {
my $self = shift;
my $file = shift; return unless defined $file;
- return $file if ref $file;
my $compress = shift || 0;
my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
+ ### Check if file is a file handle or IO glob
+ if ( ref $file ) {
+ return $file if eval{ *$file{IO} };
+ return $file if eval{ $file->isa(q{IO::Handle}) };
+ $file = q{}.$file;
+ }
### get a FH opened to the right class, so we can use it transparently
### throughout the program
my $count = $opts->{limit} || 0;
my $filter = $opts->{filter};
+ my $md5 = $opts->{md5} || 0; # cdrake
+ my $filter_cb = $opts->{filter_cb};
my $extract = $opts->{extract} || 0;
### set a cap on the amount of files to extract ###
LOOP:
while( $handle->read( $chunk, HEAD ) ) {
### IO::Zlib doesn't support this yet
- my $offset = eval { tell $handle } || 'unknown';
- $@ = '';
+ my $offset;
+ if ( ref($handle) ne 'IO::Zlib' ) {
+ local $@;
+ $offset = eval { tell $handle } || 'unknown';
+ $@ = '';
+ }
+ else {
+ $offset = 'unknown';
+ }
unless( $read++ ) {
my $gzip = GZIP_MAGIC_NUM;
$self->_error( qq[Cannot read compressed format in tar-mode] );
return;
}
-
+
### size is < HEAD, which means a corrupted file, as the minimum
### length is _at least_ HEAD
if (length $chunk != HEAD) {
### according to the posix spec, the last 12 bytes of the header are
### null bytes, to pad it to a 512 byte block. That means if these
- ### bytes are NOT null bytes, it's a corrrupt header. See:
+ ### bytes are NOT null bytes, it's a corrupt header. See:
### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
### line 111
{ my $nulls = join '', "\0" x 12;
$data = $entry->get_content_by_ref;
- ### just read everything into memory
- ### can't do lazy loading since IO::Zlib doesn't support 'seek'
- ### this is because Compress::Zlib doesn't support it =/
- ### this reads in the whole data in one read() call.
- if( $handle->read( $$data, $block ) < $block ) {
- $self->_error( qq[Read error on tarfile (missing data) '].
+ my $skip = 0;
+ my $ctx; # cdrake
+ ### skip this entry if we're filtering
+
+ if($md5) { # cdrake
+ $ctx = Digest::MD5->new; # cdrake
+ $skip=5; # cdrake
+
+ } elsif ($filter && $entry->name !~ $filter) {
+ $skip = 1;
+
+ } elsif ($filter_cb && ! $filter_cb->($entry)) {
+ $skip = 2;
+
+ ### skip this entry if it's a pax header. This is a special file added
+ ### by, among others, git-generated tarballs. It holds comments and is
+ ### not meant for extracting. See #38932: pax_global_header extracted
+ } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
+ $skip = 3;
+ }
+
+ if ($skip) {
+ #
+ # Since we're skipping, do not allocate memory for the
+ # whole file. Read it 64 BLOCKS at a time. Do not
+ # complete the skip yet because maybe what we read is a
+ # longlink and it won't get skipped after all
+ #
+ my $amt = $block;
+ my $fsz=$entry->size; # cdrake
+ while ($amt > 0) {
+ $$data = '';
+ my $this = 64 * BLOCK;
+ $this = $amt if $this > $amt;
+ if( $handle->read( $$data, $this ) < $this ) {
+ $self->_error( qq[Read error on tarfile (missing data) '].
+ $entry->full_path ."' at offset $offset" );
+ next LOOP;
+ }
+ $amt -= $this;
+ $fsz -= $this; # cdrake
+ substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake
+ $ctx->add($$data) if($skip==5); # cdrake
+ }
+ $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake
+ } else {
+
+ ### just read everything into memory
+ ### can't do lazy loading since IO::Zlib doesn't support 'seek'
+ ### this is because Compress::Zlib doesn't support it =/
+ ### this reads in the whole data in one read() call.
+ if ( $handle->read( $$data, $block ) < $block ) {
+ $self->_error( qq[Read error on tarfile (missing data) '].
$entry->full_path ."' at offset $offset" );
- next LOOP;
+ next LOOP;
+ }
+ ### throw away trailing garbage ###
+ substr ($$data, $entry->size) = "" if defined $$data;
}
- ### throw away trailing garbage ###
- substr ($$data, $entry->size) = "" if defined $$data;
-
### part II of the @LongLink munging -- need to do /after/
### the checksum check.
if( $entry->is_longlink ) {
### but that doesn't *always* happen.. so check if the last
### character is a control character, and if so remove it
### at any rate, we better remove that character here, or tests
- ### like 'eq' and hashlook ups based on names will SO not work
+ ### like 'eq' and hash lookups based on names will SO not work
### remove it by calculating the proper size, and then
### tossing out everything that's longer than that size.
undef $real_name;
}
- ### skip this entry if we're filtering
- if ($filter && $entry->name !~ $filter) {
- next LOOP;
+ if ($filter && $entry->name !~ $filter) {
+ next LOOP;
- ### skip this entry if it's a pax header. This is a special file added
- ### by, among others, git-generated tarballs. It holds comments and is
- ### not meant for extracting. See #38932: pax_global_header extracted
- } elsif ( $entry->name eq PAX_HEADER ) {
- next LOOP;
- }
+ } elsif ($filter_cb && ! $filter_cb->($entry)) {
+ next LOOP;
+
+ ### skip this entry if it's a pax header. This is a special file added
+ ### by, among others, git-generated tarballs. It holds comments and is
+ ### not meant for extracting. See #38932: pax_global_header extracted
+ } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
+ next LOOP;
+ }
if ( $extract && !$entry->is_longlink
&& !$entry->is_unknown
# use the speed optimization for all extracted files
local($self->{cwd}) = cwd() unless $self->{cwd};
- ### you requested the extraction of only certian files
+ ### you requested the extraction of only certain files
if( @args ) {
for my $file ( @args ) {
my @cwd = File::Spec->splitdir( $cwd_dir );
push @cwd, $cwd_file if length $cwd_file;
- ### We need to pass '' as the last elemant to catpath. Craig Berry
+ ### We need to pass '' as the last element to catpath. Craig Berry
### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
### The root problem is that splitpath on UNIX always returns the
### final path element as a file even if it is a directory, and of
$self->_error( qq[Could not update timestamp] );
}
- if( $CHOWN && CAN_CHOWN->() ) {
+ if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
chown $entry->uid, $entry->gid, $full or
$self->_error( qq[Could not set uid/gid on '$full'] );
}
my $err;
TRY: {
- my $orig = $self->_find_entry( $entry->linkname );
+ my $orig = $self->_find_entry( $entry->linkname, $entry );
unless( $orig ) {
$err = qq[Could not find file '] . $entry->linkname .
}
### clone the entry, make it appear as a normal file ###
- my $clone = $entry->clone;
+ my $clone = $orig->clone;
$clone->_downgrade_to_plainfile;
$self->_extract_file( $clone, $file ) or last TRY;
### it's an object already
return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
- for my $entry ( @{$self->_data} ) {
- my $path = $entry->full_path;
- return $entry if $path eq $file;
- }
+seach_entry:
+ if($self->_data){
+ for my $entry ( @{$self->_data} ) {
+ my $path = $entry->full_path;
+ return $entry if $path eq $file;
+ }
+ }
+
+ if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
+ if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
+ $file = _symlinks_resolver( $link_entry->name, $file );
+ goto seach_entry if $self->_data;
+
+ #this will be slower than never, but won't failed!
+
+ my $iterargs = $link_entry->{'_archive'};
+ if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
+ #faster but whole archive will be read in memory
+ #read whole archive and share data
+ my $archive = Archive::Tar->new;
+ $archive->read( @$iterargs );
+ push @$iterargs, $archive; #take a trace for destruction
+ if($archive->_data){
+ $self->_data( $archive->_data );
+ goto seach_entry;
+ }
+ }#faster
+
+ {#slower but lower memory usage
+ # $iterargs = [$filename, $compressed, $opts];
+ my $next = Archive::Tar->iter( @$iterargs );
+ while(my $e = $next->()){
+ if($e->full_path eq $file){
+ undef $next;
+ return $e;
+ }
+ }
+ }#slower
+ }
+ }
$self->_error( qq[No such file in archive: '$file'] );
return;
return $entry->rename( $new );
}
+=head2 $tar->chmod( $file, $mode )
+
+Change mode of $file to $mode.
+
+Returns true on success and false on failure.
+
+=cut
+
+sub chmod {
+ my $self = shift;
+ my $file = shift; return unless defined $file;
+ my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
+ my @args = ("$mode");
+
+ my $entry = $self->_find_entry( $file ) or return;
+ my $x = $entry->chmod( @args );
+ return $x;
+}
+
+=head2 $tar->chown( $file, $uname [, $gname] )
+
+Change owner $file to $uname and $gname.
+
+Returns true on success and false on failure.
+
+=cut
+
+sub chown {
+ my $self = shift;
+ my $file = shift; return unless defined $file;
+ my $uname = shift; return unless defined $uname;
+ my @args = ($uname);
+ push(@args, shift);
+
+ my $entry = $self->_find_entry( $file ) or return;
+ my $x = $entry->chown( @args );
+ return $x;
+}
+
=head2 $tar->remove (@filenamelist)
Removes any entries with names matching any of the given filenames
The second argument is used to indicate compression. You can either
compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
to be the C<gzip> compression level (between 1 and 9), but the use of
-constants is prefered:
+constants is preferred:
# write a gzip compressed file
$tar->write( 'out.tgz', COMPRESS_GZIP );
: do { seek $handle, 0, 0; local $/; <$handle> };
### make sure to close the handle if we created it
- close $handle unless ref($file);
+ if ( $file ne $handle ) {
+ unless( close $handle ) {
+ $self->_error( qq[Could not write tar] );
+ return;
+ }
+ }
return $rv;
}
my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
### remove the prefix from the file name
- ### not sure if this is still neeeded --kane
+ ### not sure if this is still needed --kane
### no it's not -- Archive::Tar::File->_new_from_file will take care of
### this for us. Even worse, this would break if we tried to add a file
### like x/x.
next;
}
+ eval {
+ if( utf8::is_utf8( $file )) {
+ utf8::encode( $file );
+ }
+ };
+
unless( -e $file || -l $file ) {
$self->_error( qq[No such file: '$file'] );
next;
devmajor, devminor, prefix, type. (On MacOS, the file's path and
modification times are converted to Unix equivalents.)
-Valid values for the file type are the following constants defined in
-Archive::Tar::Constants:
+Valid values for the file type are the following constants defined by
+Archive::Tar::Constant:
=over 4
=head2 $tar->error( [$BOOL] )
-Returns the current errorstring (usually, the last error reported).
+Returns the current error string (usually, the last error reported).
If a true value was specified, it will give the C<Carp::longmess>
equivalent of the error, in effect giving you a stacktrace.
and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
-If you're using C<Archive::Tar>'s C<exract()> method, C<setcwd()> will
+If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
be called for you.
=cut
The second argument is used to indicate compression. You can either
compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
to be the C<gzip> compression level (between 1 and 9), but the use of
-constants is prefered:
+constants is preferred:
# write a gzip compressed file
Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
sub iter {
my $class = shift;
my $filename = shift or return;
- my $compressed = shift or 0;
+ my $compressed = shift || 0;
my $opts = shift || {};
### get a handle to read from.
) or return;
my @data;
+ my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
return sub {
return shift(@data) if @data; # more than one file returned?
return unless $handle; # handle exhausted?
### read data, should only return file
my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
@data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
+ if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
+ foreach(@data){
+ #may refine this heuristic for ON_UNIX?
+ if($_->linkname){
+ #is there a better slot to store/share it ?
+ $_->{'_archive'} = $CONSTRUCT_ARGS;
+ }
+ }
+ }
### return one piece of data
return shift(@data) if @data;
### data is exhausted, free the filehandle
undef $handle;
+ if(@$CONSTRUCT_ARGS == 4){
+ #free archive in memory
+ undef $CONSTRUCT_ARGS->[-1];
+ }
return;
};
}
argument it returns a list of hash references containing the requested
properties of each file. The following list of properties is
supported: full_path, name, size, mtime (last modified date), mode,
-uid, gid, linkname, uname, gname, devmajor, devminor, prefix.
+uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
See C<Archive::Tar::File> for details about supported properties.
croak("You have to install IO::String to support writing archives to strings");
}
+sub _symlinks_resolver{
+ my ($src, $trg) = @_;
+ my @src = split /[\/\\]/, $src;
+ my @trg = split /[\/\\]/, $trg;
+ pop @src; #strip out current object name
+ if(@trg and $trg[0] eq ''){
+ shift @trg;
+ #restart path from scratch
+ @src = ( );
+ }
+ foreach my $part ( @trg ){
+ next if $part eq '.'; #ignore current
+ if($part eq '..'){
+ #got to parent
+ pop @src;
+ }
+ else{
+ #append it
+ push @src, $part;
+ }
+ }
+ my $path = join('/', @src);
+ warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
+ return $path;
+}
+
1;
__END__
=head2 $Archive::Tar::ZERO_PAD_NUMBERS
This variable holds a boolean indicating if we will create
-zero padded numbers for C<size>, C<mtime> and C<checksum>.
+zero padded numbers for C<size>, C<mtime> and C<checksum>.
The default is C<0>, indicating that we will create space padded
numbers. Added for compatibility with C<busybox> implementations.
+=head2 Tuning the way RESOLVE_SYMLINK will works
+
+ You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
+ or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
+
+ Values can be one of the following:
+
+ none
+ Disable this mechanism and failed as it was in previous version (<1.88)
+
+ speed (default)
+ If you prefer speed
+ this will read again the whole archive using read() so all entries
+ will be available
+
+ memory
+ If you prefer memory
+
+ Limitation
+
+ It won't work for terminal, pipe or sockets or every non seekable source.
+
+=cut
+
=head1 FAQ
=over 4
By default, C<Archive::Tar> is in a completely POSIX-compatible
mode, which uses the POSIX-specification of C<tar> to store files.
-For paths greather than 100 characters, this is done using the
+For paths greater than 100 characters, this is done using the
C<POSIX header prefix>. Non-POSIX-compatible clients may not support
this part of the specification, and may only support the C<GNU Extended
Header> functionality. To facilitate those clients, you can set the
=head1 CAVEATS
-The AIX tar does not fill all unused space in the tar archive with 0x00.
+The AIX tar does not fill all unused space in the tar archive with 0x00.
This sometimes leads to warning messages from C<Archive::Tar>.
Invalid header block at offset nnn
AIX 5.3 TL8 SP8
AIX 5.3 TL9 SP5
AIX 5.3 TL10 SP2
-
+
AIX 6.1 TL0 SP11
AIX 6.1 TL1 SP7
AIX 6.1 TL2 SP6
AIX 6.1 TL3 SP3
-The IBM APAR number for this problem is IZ50240 (Reported component ID:
-5765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
+The IBM APAR number for this problem is IZ50240 (Reported component ID:
+5765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
If you need an ifix please contact your local IBM AIX support.
=head1 TODO
C<http://www.gnu.org/software/tar/manual/tar.html>
-=item The PAX format specication
+=item The PAX format specification
-The specifcation which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
+The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>