1 package Archive::Tar::File;
6 use File::Spec::Unix ();
10 ### avoid circular use, so only require;
12 use Archive::Tar::Constant;
14 use vars qw[@ISA $VERSION];
15 #@ISA = qw[Archive::Tar];
18 ### set value to 1 to oct() it during the unpack ###
27 type => 0, # character
28 linkname => 0, # string
30 version => 0, # 2 bytes
33 devmajor => 1, # octal
34 devminor => 1, # octal
37 ### end UNPACK items ###
38 raw => 0, # the raw data chunk
39 data => 0, # the data associated with the file --
40 # This might be very memory intensive
43 ### install get/set accessors for this object.
44 for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
45 my $key = $tmpl->[$i];
47 *{__PACKAGE__."::$key"} = sub {
49 $self->{$key} = $_[0] if @_;
51 ### just in case the key is not there or undef or something ###
60 Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
64 my @items = $tar->get_files;
66 print $_->name, ' ', $_->size, "\n" for @items;
68 print $object->get_content;
69 $object->replace_content('new content');
71 $object->rename( 'new/full/path/to/file.c' );
75 Archive::Tar::Files provides a neat little object layer for in-memory
76 extracted files. It's mostly used internally in Archive::Tar to tidy
77 up the code, but there's no reason users shouldn't use this API as
82 A lot of the methods in this package are accessors to the various
83 fields in the tar header:
97 The user id owning the file
101 The group id owning the file
109 Modification time. Adjusted to mac-time on MacOS if required
113 Checksum field for the tar header
117 File type -- numeric, but comparable to exported constants -- see
118 Archive::Tar's documentation
122 If the file is a symlink, the file it's pointing to
126 Tar magic string -- not useful for most users
130 Tar version string -- not useful for most users
134 The user name that owns the file
138 The group name that owns the file
142 Device major number in case of a special file
146 Device minor number in case of a special file
150 Any directory to prefix to the extraction path, if any
154 Raw tar header -- not useful for most users
160 =head2 Archive::Tar::File->new( file => $path )
162 Returns a new Archive::Tar::File object from an existing file.
164 Returns undef on failure.
166 =head2 Archive::Tar::File->new( data => $path, $data, $opt )
168 Returns a new Archive::Tar::File object from data.
170 C<$path> defines the file name (which need not exist), C<$data> the
171 file contents, and C<$opt> is a reference to a hash of attributes
172 which may be used to override the default attributes (fields in the
173 tar header), which are described above in the Accessors section.
175 Returns undef on failure.
177 =head2 Archive::Tar::File->new( chunk => $chunk )
179 Returns a new Archive::Tar::File object from a raw 512-byte tar
182 Returns undef on failure.
190 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
191 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
192 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
198 ### copies the data, creates a clone ###
201 return bless { %$self }, ref $self;
204 sub _new_from_chunk {
206 my $chunk = shift or return; # 512 bytes of tar header
209 ### filter any arguments on defined-ness of values.
210 ### this allows overriding from what the tar-header is saying
211 ### about this tar-entry. Particularly useful for @LongLink files
212 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
214 ### makes it start at 0 actually... :) ###
217 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
218 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
220 my $obj = bless { %entry, %args }, $class;
222 ### magic is a filetype string.. it should have something like 'ustar' or
223 ### something similar... if the chunk is garbage, skip it
224 return unless $obj->magic !~ /\W/;
226 ### store the original chunk ###
229 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
230 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
241 ### path has to at least exist
242 return unless defined $path;
244 my $type = __PACKAGE__->_filetype($path);
248 unless ($type == DIR ) {
249 my $fh = IO::File->new;
251 unless( $fh->open($path) ) {
252 ### dangling symlinks are fine, stop reading but continue
253 ### creating the object
254 last READ if $type == SYMLINK;
256 ### otherwise, return from this function --
257 ### anything that's *not* a symlink should be
262 ### binmode needed to read files properly on win32 ###
264 $data = do { local $/; <$fh> };
269 my @items = qw[mode uid gid size mtime];
270 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
273 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
274 ### not used. We currently do not have an easy way to see if we are in
275 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
276 ### The VMS UIC has the upper 16 bits is the GID, which in many cases
277 ### the VMS UIC will be larger than 209715, the largest that TAR can
278 ### handle. So for now, assume it is traditional if the UID is larger
281 if ($hash{uid} > 0x10000) {
282 $hash{uid} = $hash{uid} & 0xFFFF;
285 ### The file length from stat() is the physical length of the file
286 ### However the amount of data read in may be more for some file types.
287 ### Fixed length files are read past the logical EOF to end of the block
288 ### containing. Other file types get expanded on read because record
289 ### delimiters are added.
291 my $data_len = length $data;
292 $hash{size} = $data_len if $hash{size} < $data_len;
295 ### you *must* set size == 0 on symlinks, or the next entry will be
296 ### though of as the contents of the symlink, which is wrong.
297 ### this fixes bug #7937
298 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
299 $hash{mtime} -= TIME_OFFSET;
301 ### strip the high bits off the mode, which we don't need to store
302 $hash{mode} = STRIP_MODE->( $hash{mode} );
305 ### probably requires some file path munging here ... ###
306 ### name and prefix are set later
312 linkname => ($type == SYMLINK and CAN_READLINK)
316 version => TAR_VERSION,
317 uname => UNAME->( $hash{uid} ),
318 gname => GNAME->( $hash{gid} ),
319 devmajor => 0, # not handled
320 devminor => 0, # not handled
327 ### fix up the prefix and file from the path
328 my($prefix,$file) = $obj->_prefix_and_file( $path );
329 $obj->prefix( $prefix );
337 my $path = shift; return unless defined $path;
338 my $data = shift; return unless defined $data;
347 size => length $data,
348 mtime => time - TIME_OFFSET,
353 version => TAR_VERSION,
354 uname => UNAME->( UID ),
355 gname => GNAME->( GID ),
361 ### overwrite with user options, if provided ###
362 if( $opt and ref $opt eq 'HASH' ) {
363 for my $key ( keys %$opt ) {
365 ### don't write bogus options ###
366 next unless exists $obj->{$key};
367 $obj->{$key} = $opt->{$key};
373 ### fix up the prefix and file from the path
374 my($prefix,$file) = $obj->_prefix_and_file( $path );
375 $obj->prefix( $prefix );
381 sub _prefix_and_file {
385 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
386 my @dirs = File::Spec->splitdir( $dirs );
388 ### so sometimes the last element is '' -- probably when trailing
389 ### dir slashes are encountered... this is of course pointless,
391 pop @dirs while @dirs and not length $dirs[-1];
393 ### if it's a directory, then $file might be empty
394 $file = pop @dirs if $self->is_dir and not length $file;
396 ### splitting ../ gives you the relative path in native syntax
397 map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS;
399 my $prefix = File::Spec::Unix->catdir(
400 grep { length } $vol, @dirs
402 return( $prefix, $file );
409 return unless defined $file;
411 return SYMLINK if (-l $file); # Symlink
413 return FILE if (-f _); # Plain file
415 return DIR if (-d _); # Directory
417 return FIFO if (-p _); # Named pipe
419 return SOCKET if (-S _); # Socket
421 return BLOCKDEV if (-b _); # Block special
423 return CHARDEV if (-c _); # Character special
425 ### shouldn't happen, this is when making archives, not reading ###
426 return LONGLINK if ( $file eq LONGLINK_NAME );
428 return UNKNOWN; # Something else (like what?)
432 ### this method 'downgrades' a file to plain file -- this is used for
433 ### symlinks when FOLLOW_SYMLINKS is true.
434 sub _downgrade_to_plainfile {
436 $entry->type( FILE );
437 $entry->mode( MODE );
438 $entry->linkname('');
443 =head2 $bool = $file->extract( [ $alternative_name ] )
445 Extract this object, optionally to an alternative name.
447 See C<< Archive::Tar->extract_file >> for details.
449 Returns true on success and false on failure.
456 local $Carp::CarpLevel += 1;
458 return Archive::Tar->_extract_file( $self, @_ );
461 =head2 $path = $file->full_path
463 Returns the full path from the tar header; this is basically a
464 concatenation of the C<prefix> and C<name> fields.
471 ### if prefix field is emtpy
472 return $self->name unless defined $self->prefix and length $self->prefix;
474 ### or otherwise, catfile'd
475 return File::Spec::Unix->catfile( $self->prefix, $self->name );
479 =head2 $bool = $file->validate
481 Done by Archive::Tar internally when reading the tar file:
482 validate the header against the checksum to ensure integer tar file.
484 Returns true on success, false on failure
491 my $raw = $self->raw;
493 ### don't know why this one is different from the one we /write/ ###
494 substr ($raw, 148, 8) = " ";
496 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
497 ### like GNU tar does. See here for details:
498 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
499 ### so we do both a signed AND unsigned validate. if one succeeds, that's
501 return ( (unpack ("%16C*", $raw) == $self->chksum)
502 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
505 =head2 $bool = $file->has_content
507 Returns a boolean to indicate whether the current object has content.
508 Some special files like directories and so on never will have any
509 content. This method is mainly to make sure you don't get warnings
510 for using uninitialized values when looking at an object's content.
516 return defined $self->data() && length $self->data() ? 1 : 0;
519 =head2 $content = $file->get_content
521 Returns the current content for the in-memory file
530 =head2 $cref = $file->get_content_by_ref
532 Returns the current content for the in-memory file as a scalar
533 reference. Normal users won't need this, but it will save memory if
534 you are dealing with very large data files in your tar archive, since
535 it will pass the contents by reference, rather than make a copy of it
540 sub get_content_by_ref {
543 return \$self->{data};
546 =head2 $bool = $file->replace_content( $content )
548 Replace the current content of the file with the new content. This
549 only affects the in-memory archive, not the on-disk version until
552 Returns true on success, false on failure.
556 sub replace_content {
558 my $data = shift || '';
560 $self->data( $data );
561 $self->size( length $data );
565 =head2 $bool = $file->rename( $new_name )
567 Rename the current file to $new_name.
569 Note that you must specify a Unix path for $new_name, since per tar
570 standard, all files in the archive must be Unix paths.
572 Returns true on success and false on failure.
580 return unless defined $path;
582 my ($prefix,$file) = $self->_prefix_and_file( $path );
584 $self->name( $file );
585 $self->prefix( $prefix );
590 =head1 Convenience methods
592 To quickly check the type of a C<Archive::Tar::File> object, you can
593 use the following methods:
599 Returns true if the file is of type C<file>
603 Returns true if the file is of type C<dir>
605 =item $file->is_hardlink
607 Returns true if the file is of type C<hardlink>
609 =item $file->is_symlink
611 Returns true if the file is of type C<symlink>
613 =item $file->is_chardev
615 Returns true if the file is of type C<chardev>
617 =item $file->is_blockdev
619 Returns true if the file is of type C<blockdev>
623 Returns true if the file is of type C<fifo>
625 =item $file->is_socket
627 Returns true if the file is of type C<socket>
629 =item $file->is_longlink
631 Returns true if the file is of type C<LongLink>.
632 Should not happen after a successful C<read>.
634 =item $file->is_label
636 Returns true if the file is of type C<Label>.
637 Should not happen after a successful C<read>.
639 =item $file->is_unknown
641 Returns true if the file type is C<unknown>
647 #stupid perl5.5.3 needs to warn if it's not numeric
648 sub is_file { local $^W; FILE == $_[0]->type }
649 sub is_dir { local $^W; DIR == $_[0]->type }
650 sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
651 sub is_symlink { local $^W; SYMLINK == $_[0]->type }
652 sub is_chardev { local $^W; CHARDEV == $_[0]->type }
653 sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
654 sub is_fifo { local $^W; FIFO == $_[0]->type }
655 sub is_socket { local $^W; SOCKET == $_[0]->type }
656 sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
657 sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
658 sub is_label { local $^W; LABEL eq $_[0]->type }