1 package Archive::Tar::File;
6 use File::Spec::Unix ();
10 ### avoid circular use, so only require;
12 use Archive::Tar::Constant;
14 use vars qw[@ISA $VERSION];
15 #@ISA = qw[Archive::Tar];
18 ### set value to 1 to oct() it during the unpack ###
21 name => 0, # string A100
25 size => 0, # octal # cdrake - not *always* octal.. A12
26 mtime => 1, # octal A12
27 chksum => 1, # octal A8
28 type => 0, # character A1
29 linkname => 0, # string A100
30 magic => 0, # string A6
31 version => 0, # 2 bytes A2
32 uname => 0, # string A32
33 gname => 0, # string A32
34 devmajor => 1, # octal A8
35 devminor => 1, # octal A8
36 prefix => 0, # A155 x 12
38 ### end UNPACK items ###
39 raw => 0, # the raw data chunk
40 data => 0, # the data associated with the file --
41 # This might be very memory intensive
44 ### install get/set accessors for this object.
45 for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
46 my $key = $tmpl->[$i];
48 *{__PACKAGE__."::$key"} = sub {
50 $self->{$key} = $_[0] if @_;
52 ### just in case the key is not there or undef or something ###
61 Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
65 my @items = $tar->get_files;
67 print $_->name, ' ', $_->size, "\n" for @items;
69 print $object->get_content;
70 $object->replace_content('new content');
72 $object->rename( 'new/full/path/to/file.c' );
76 Archive::Tar::Files provides a neat little object layer for in-memory
77 extracted files. It's mostly used internally in Archive::Tar to tidy
78 up the code, but there's no reason users shouldn't use this API as
83 A lot of the methods in this package are accessors to the various
84 fields in the tar header:
98 The user id owning the file
102 The group id owning the file
110 Modification time. Adjusted to mac-time on MacOS if required
114 Checksum field for the tar header
118 File type -- numeric, but comparable to exported constants -- see
119 Archive::Tar's documentation
123 If the file is a symlink, the file it's pointing to
127 Tar magic string -- not useful for most users
131 Tar version string -- not useful for most users
135 The user name that owns the file
139 The group name that owns the file
143 Device major number in case of a special file
147 Device minor number in case of a special file
151 Any directory to prefix to the extraction path, if any
155 Raw tar header -- not useful for most users
161 =head2 Archive::Tar::File->new( file => $path )
163 Returns a new Archive::Tar::File object from an existing file.
165 Returns undef on failure.
167 =head2 Archive::Tar::File->new( data => $path, $data, $opt )
169 Returns a new Archive::Tar::File object from data.
171 C<$path> defines the file name (which need not exist), C<$data> the
172 file contents, and C<$opt> is a reference to a hash of attributes
173 which may be used to override the default attributes (fields in the
174 tar header), which are described above in the Accessors section.
176 Returns undef on failure.
178 =head2 Archive::Tar::File->new( chunk => $chunk )
180 Returns a new Archive::Tar::File object from a raw 512-byte tar
183 Returns undef on failure.
191 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
192 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
193 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
199 ### copies the data, creates a clone ###
202 return bless { %$self }, ref $self;
205 sub _new_from_chunk {
207 my $chunk = shift or return; # 512 bytes of tar header
210 ### filter any arguments on defined-ness of values.
211 ### this allows overriding from what the tar-header is saying
212 ### about this tar-entry. Particularly useful for @LongLink files
213 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
215 ### makes it start at 0 actually... :) ###
218 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake
219 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake
220 $s=> $v ? oct $_ : $_ # cdrake
221 # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb
222 } unpack( UNPACK, $chunk ); # cdrake
223 # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake
226 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake
227 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
229 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake
233 my $obj = bless { %entry, %args }, $class;
235 ### magic is a filetype string.. it should have something like 'ustar' or
236 ### something similar... if the chunk is garbage, skip it
237 return unless $obj->magic !~ /\W/;
239 ### store the original chunk ###
242 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
243 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
254 ### path has to at least exist
255 return unless defined $path;
257 my $type = __PACKAGE__->_filetype($path);
261 unless ($type == DIR ) {
262 my $fh = IO::File->new;
264 unless( $fh->open($path) ) {
265 ### dangling symlinks are fine, stop reading but continue
266 ### creating the object
267 last READ if $type == SYMLINK;
269 ### otherwise, return from this function --
270 ### anything that's *not* a symlink should be
275 ### binmode needed to read files properly on win32 ###
277 $data = do { local $/; <$fh> };
282 my @items = qw[mode uid gid size mtime];
283 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
286 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
287 ### not used. We currently do not have an easy way to see if we are in
288 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
289 ### The VMS UIC has the upper 16 bits is the GID, which in many cases
290 ### the VMS UIC will be larger than 209715, the largest that TAR can
291 ### handle. So for now, assume it is traditional if the UID is larger
294 if ($hash{uid} > 0x10000) {
295 $hash{uid} = $hash{uid} & 0xFFFF;
298 ### The file length from stat() is the physical length of the file
299 ### However the amount of data read in may be more for some file types.
300 ### Fixed length files are read past the logical EOF to end of the block
301 ### containing. Other file types get expanded on read because record
302 ### delimiters are added.
304 my $data_len = length $data;
305 $hash{size} = $data_len if $hash{size} < $data_len;
308 ### you *must* set size == 0 on symlinks, or the next entry will be
309 ### though of as the contents of the symlink, which is wrong.
310 ### this fixes bug #7937
311 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
312 $hash{mtime} -= TIME_OFFSET;
314 ### strip the high bits off the mode, which we don't need to store
315 $hash{mode} = STRIP_MODE->( $hash{mode} );
318 ### probably requires some file path munging here ... ###
319 ### name and prefix are set later
325 linkname => ($type == SYMLINK and CAN_READLINK)
329 version => TAR_VERSION,
330 uname => UNAME->( $hash{uid} ),
331 gname => GNAME->( $hash{gid} ),
332 devmajor => 0, # not handled
333 devminor => 0, # not handled
340 ### fix up the prefix and file from the path
341 my($prefix,$file) = $obj->_prefix_and_file( $path );
342 $obj->prefix( $prefix );
350 my $path = shift; return unless defined $path;
351 my $data = shift; return unless defined $data;
360 size => length $data,
361 mtime => time - TIME_OFFSET,
366 version => TAR_VERSION,
367 uname => UNAME->( UID ),
368 gname => GNAME->( GID ),
374 ### overwrite with user options, if provided ###
375 if( $opt and ref $opt eq 'HASH' ) {
376 for my $key ( keys %$opt ) {
378 ### don't write bogus options ###
379 next unless exists $obj->{$key};
380 $obj->{$key} = $opt->{$key};
386 ### fix up the prefix and file from the path
387 my($prefix,$file) = $obj->_prefix_and_file( $path );
388 $obj->prefix( $prefix );
394 sub _prefix_and_file {
398 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
399 my @dirs = File::Spec->splitdir( $dirs );
401 ### so sometimes the last element is '' -- probably when trailing
402 ### dir slashes are encountered... this is of course pointless,
404 pop @dirs while @dirs and not length $dirs[-1];
406 ### if it's a directory, then $file might be empty
407 $file = pop @dirs if $self->is_dir and not length $file;
409 ### splitting ../ gives you the relative path in native syntax
410 map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS;
412 my $prefix = File::Spec::Unix->catdir(
413 grep { length } $vol, @dirs
415 return( $prefix, $file );
422 return unless defined $file;
424 return SYMLINK if (-l $file); # Symlink
426 return FILE if (-f _); # Plain file
428 return DIR if (-d _); # Directory
430 return FIFO if (-p _); # Named pipe
432 return SOCKET if (-S _); # Socket
434 return BLOCKDEV if (-b _); # Block special
436 return CHARDEV if (-c _); # Character special
438 ### shouldn't happen, this is when making archives, not reading ###
439 return LONGLINK if ( $file eq LONGLINK_NAME );
441 return UNKNOWN; # Something else (like what?)
445 ### this method 'downgrades' a file to plain file -- this is used for
446 ### symlinks when FOLLOW_SYMLINKS is true.
447 sub _downgrade_to_plainfile {
449 $entry->type( FILE );
450 $entry->mode( MODE );
451 $entry->linkname('');
456 =head2 $bool = $file->extract( [ $alternative_name ] )
458 Extract this object, optionally to an alternative name.
460 See C<< Archive::Tar->extract_file >> for details.
462 Returns true on success and false on failure.
469 local $Carp::CarpLevel += 1;
471 return Archive::Tar->_extract_file( $self, @_ );
474 =head2 $path = $file->full_path
476 Returns the full path from the tar header; this is basically a
477 concatenation of the C<prefix> and C<name> fields.
484 ### if prefix field is empty
485 return $self->name unless defined $self->prefix and length $self->prefix;
487 ### or otherwise, catfile'd
488 return File::Spec::Unix->catfile( $self->prefix, $self->name );
492 =head2 $bool = $file->validate
494 Done by Archive::Tar internally when reading the tar file:
495 validate the header against the checksum to ensure integer tar file.
497 Returns true on success, false on failure
504 my $raw = $self->raw;
506 ### don't know why this one is different from the one we /write/ ###
507 substr ($raw, 148, 8) = " ";
509 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
510 ### like GNU tar does. See here for details:
511 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
512 ### so we do both a signed AND unsigned validate. if one succeeds, that's
514 return ( (unpack ("%16C*", $raw) == $self->chksum)
515 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
518 =head2 $bool = $file->has_content
520 Returns a boolean to indicate whether the current object has content.
521 Some special files like directories and so on never will have any
522 content. This method is mainly to make sure you don't get warnings
523 for using uninitialized values when looking at an object's content.
529 return defined $self->data() && length $self->data() ? 1 : 0;
532 =head2 $content = $file->get_content
534 Returns the current content for the in-memory file
543 =head2 $cref = $file->get_content_by_ref
545 Returns the current content for the in-memory file as a scalar
546 reference. Normal users won't need this, but it will save memory if
547 you are dealing with very large data files in your tar archive, since
548 it will pass the contents by reference, rather than make a copy of it
553 sub get_content_by_ref {
556 return \$self->{data};
559 =head2 $bool = $file->replace_content( $content )
561 Replace the current content of the file with the new content. This
562 only affects the in-memory archive, not the on-disk version until
565 Returns true on success, false on failure.
569 sub replace_content {
571 my $data = shift || '';
573 $self->data( $data );
574 $self->size( length $data );
578 =head2 $bool = $file->rename( $new_name )
580 Rename the current file to $new_name.
582 Note that you must specify a Unix path for $new_name, since per tar
583 standard, all files in the archive must be Unix paths.
585 Returns true on success and false on failure.
593 return unless defined $path;
595 my ($prefix,$file) = $self->_prefix_and_file( $path );
597 $self->name( $file );
598 $self->prefix( $prefix );
603 =head2 $bool = $file->chmod $mode)
605 Change mode of $file to $mode. The mode can be a string or a number
606 which is interpreted as octal whether or not a leading 0 is given.
608 Returns true on success and false on failure.
614 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
615 $self->{mode} = oct($mode);
619 =head2 $bool = $file->chown( $user [, $group])
621 Change owner of $file to $user. If a $group is given that is changed
622 as well. You can also pass a single parameter with a colon separating the
623 use and group as in 'root:wheel'.
625 Returns true on success and false on failure.
632 return unless defined $uname;
634 if (-1 != index($uname, ':')) {
635 ($uname, $gname) = split(/:/, $uname);
637 $gname = shift if @_ > 0;
640 $self->uname( $uname );
641 $self->gname( $gname ) if $gname;
645 =head1 Convenience methods
647 To quickly check the type of a C<Archive::Tar::File> object, you can
648 use the following methods:
654 Returns true if the file is of type C<file>
658 Returns true if the file is of type C<dir>
660 =item $file->is_hardlink
662 Returns true if the file is of type C<hardlink>
664 =item $file->is_symlink
666 Returns true if the file is of type C<symlink>
668 =item $file->is_chardev
670 Returns true if the file is of type C<chardev>
672 =item $file->is_blockdev
674 Returns true if the file is of type C<blockdev>
678 Returns true if the file is of type C<fifo>
680 =item $file->is_socket
682 Returns true if the file is of type C<socket>
684 =item $file->is_longlink
686 Returns true if the file is of type C<LongLink>.
687 Should not happen after a successful C<read>.
689 =item $file->is_label
691 Returns true if the file is of type C<Label>.
692 Should not happen after a successful C<read>.
694 =item $file->is_unknown
696 Returns true if the file type is C<unknown>
702 #stupid perl5.5.3 needs to warn if it's not numeric
703 sub is_file { local $^W; FILE == $_[0]->type }
704 sub is_dir { local $^W; DIR == $_[0]->type }
705 sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
706 sub is_symlink { local $^W; SYMLINK == $_[0]->type }
707 sub is_chardev { local $^W; CHARDEV == $_[0]->type }
708 sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
709 sub is_fifo { local $^W; FIFO == $_[0]->type }
710 sub is_socket { local $^W; SOCKET == $_[0]->type }
711 sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
712 sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
713 sub is_label { local $^W; LABEL eq $_[0]->type }