cpan/Archive-Tar/lib/Archive/Tar/File.pm

   1 package Archive::Tar::File;
   2 use strict;
   3
   4 use Carp                ();
   5 use IO::File;
   6 use File::Spec::Unix    ();
   7 use File::Spec          ();
   8 use File::Basename      ();
   9
  10 ### avoid circular use, so only require;
  11 require Archive::Tar;
  12 use Archive::Tar::Constant;
  13
  14 use vars qw[@ISA $VERSION];
  15 #@ISA        = qw[Archive::Tar];
  16 $VERSION    = '1.74';
  17
  18 ### set value to 1 to oct() it during the unpack ###
  19 my $tmpl = [
  20         name        => 0,   # string
  21         mode        => 1,   # octal
  22         uid         => 1,   # octal
  23         gid         => 1,   # octal
  24         size        => 1,   # octal
  25         mtime       => 1,   # octal
  26         chksum      => 1,   # octal
  27         type        => 0,   # character
  28         linkname    => 0,   # string
  29         magic       => 0,   # string
  30         version     => 0,   # 2 bytes
  31         uname       => 0,   # string
  32         gname       => 0,   # string
  33         devmajor    => 1,   # octal
  34         devminor    => 1,   # octal
  35         prefix      => 0,
  36
  37 ### end UNPACK items ###
  38         raw         => 0,   # the raw data chunk
  39         data        => 0,   # the data associated with the file --
  40                             # This  might be very memory intensive
  41 ];
  42
  43 ### install get/set accessors for this object.
  44 for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
  45     my $key = $tmpl->[$i];
  46     no strict 'refs';
  47     *{__PACKAGE__."::$key"} = sub {
  48         my $self = shift;
  49         $self->{$key} = $_[0] if @_;
  50
  51         ### just in case the key is not there or undef or something ###
  52         {   local $^W = 0;
  53             return $self->{$key};
  54         }
  55     }
  56 }
  57
  58 =head1 NAME
  59
  60 Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
  61
  62 =head1 SYNOPSIS
  63
  64     my @items = $tar->get_files;
  65
  66     print $_->name, ' ', $_->size, "\n" for @items;
  67
  68     print $object->get_content;
  69     $object->replace_content('new content');
  70
  71     $object->rename( 'new/full/path/to/file.c' );
  72
  73 =head1 DESCRIPTION
  74
  75 Archive::Tar::Files provides a neat little object layer for in-memory
  76 extracted files. It's mostly used internally in Archive::Tar to tidy
  77 up the code, but there's no reason users shouldn't use this API as
  78 well.
  79
  80 =head2 Accessors
  81
  82 A lot of the methods in this package are accessors to the various
  83 fields in the tar header:
  84
  85 =over 4
  86
  87 =item name
  88
  89 The file's name
  90
  91 =item mode
  92
  93 The file's mode
  94
  95 =item uid
  96
  97 The user id owning the file
  98
  99 =item gid
 100
 101 The group id owning the file
 102
 103 =item size
 104
 105 File size in bytes
 106
 107 =item mtime
 108
 109 Modification time. Adjusted to mac-time on MacOS if required
 110
 111 =item chksum
 112
 113 Checksum field for the tar header
 114
 115 =item type
 116
 117 File type -- numeric, but comparable to exported constants -- see
 118 Archive::Tar's documentation
 119
 120 =item linkname
 121
 122 If the file is a symlink, the file it's pointing to
 123
 124 =item magic
 125
 126 Tar magic string -- not useful for most users
 127
 128 =item version
 129
 130 Tar version string -- not useful for most users
 131
 132 =item uname
 133
 134 The user name that owns the file
 135
 136 =item gname
 137
 138 The group name that owns the file
 139
 140 =item devmajor
 141
 142 Device major number in case of a special file
 143
 144 =item devminor
 145
 146 Device minor number in case of a special file
 147
 148 =item prefix
 149
 150 Any directory to prefix to the extraction path, if any
 151
 152 =item raw
 153
 154 Raw tar header -- not useful for most users
 155
 156 =back
 157
 158 =head1 Methods
 159
 160 =head2 Archive::Tar::File->new( file => $path )
 161
 162 Returns a new Archive::Tar::File object from an existing file.
 163
 164 Returns undef on failure.
 165
 166 =head2 Archive::Tar::File->new( data => $path, $data, $opt )
 167
 168 Returns a new Archive::Tar::File object from data.
 169
 170 C<$path> defines the file name (which need not exist), C<$data> the
 171 file contents, and C<$opt> is a reference to a hash of attributes
 172 which may be used to override the default attributes (fields in the
 173 tar header), which are described above in the Accessors section.
 174
 175 Returns undef on failure.
 176
 177 =head2 Archive::Tar::File->new( chunk => $chunk )
 178
 179 Returns a new Archive::Tar::File object from a raw 512-byte tar
 180 archive chunk.
 181
 182 Returns undef on failure.
 183
 184 =cut
 185
 186 sub new {
 187     my $class   = shift;
 188     my $what    = shift;
 189
 190     my $obj =   ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
 191                 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
 192                 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
 193                 undef;
 194
 195     return $obj;
 196 }
 197
 198 ### copies the data, creates a clone ###
 199 sub clone {
 200     my $self = shift;
 201     return bless { %$self }, ref $self;
 202 }
 203
 204 sub _new_from_chunk {
 205     my $class = shift;
 206     my $chunk = shift or return;    # 512 bytes of tar header
 207     my %hash  = @_;
 208
 209     ### filter any arguments on defined-ness of values.
 210     ### this allows overriding from what the tar-header is saying
 211     ### about this tar-entry. Particularly useful for @LongLink files
 212     my %args  = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
 213
 214     ### makes it start at 0 actually... :) ###
 215     my $i = -1;
 216     my %entry = map {
 217         $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
 218     } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
 219
 220     my $obj = bless { %entry, %args }, $class;
 221
 222         ### magic is a filetype string.. it should have something like 'ustar' or
 223         ### something similar... if the chunk is garbage, skip it
 224         return unless $obj->magic !~ /\W/;
 225
 226     ### store the original chunk ###
 227     $obj->raw( $chunk );
 228
 229     $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
 230     $obj->type(DIR)  if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
 231
 232
 233     return $obj;
 234
 235 }
 236
 237 sub _new_from_file {
 238     my $class       = shift;
 239     my $path        = shift;
 240
 241     ### path has to at least exist
 242     return unless defined $path;
 243
 244     my $type        = __PACKAGE__->_filetype($path);
 245     my $data        = '';
 246
 247     READ: {
 248         unless ($type == DIR ) {
 249             my $fh = IO::File->new;
 250
 251             unless( $fh->open($path) ) {
 252                 ### dangling symlinks are fine, stop reading but continue
 253                 ### creating the object
 254                 last READ if $type == SYMLINK;
 255
 256                 ### otherwise, return from this function --
 257                 ### anything that's *not* a symlink should be
 258                 ### resolvable
 259                 return;
 260             }
 261
 262             ### binmode needed to read files properly on win32 ###
 263             binmode $fh;
 264             $data = do { local $/; <$fh> };
 265             close $fh;
 266         }
 267     }
 268
 269     my @items       = qw[mode uid gid size mtime];
 270     my %hash        = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
 271
 272     if (ON_VMS) {
 273         ### VMS has two UID modes, traditional and POSIX.  Normally POSIX is
 274         ### not used.  We currently do not have an easy way to see if we are in
 275         ### POSIX mode.  In traditional mode, the UID is actually the VMS UIC.
 276         ### The VMS UIC has the upper 16 bits is the GID, which in many cases
 277         ### the VMS UIC will be larger than 209715, the largest that TAR can
 278         ### handle.  So for now, assume it is traditional if the UID is larger
 279         ### than 0x10000.
 280
 281         if ($hash{uid} > 0x10000) {
 282             $hash{uid} = $hash{uid} & 0xFFFF;
 283         }
 284
 285         ### The file length from stat() is the physical length of the file
 286         ### However the amount of data read in may be more for some file types.
 287         ### Fixed length files are read past the logical EOF to end of the block
 288         ### containing.  Other file types get expanded on read because record
 289         ### delimiters are added.
 290
 291         my $data_len = length $data;
 292         $hash{size} = $data_len if $hash{size} < $data_len;
 293
 294     }
 295     ### you *must* set size == 0 on symlinks, or the next entry will be
 296     ### though of as the contents of the symlink, which is wrong.
 297     ### this fixes bug #7937
 298     $hash{size}     = 0 if ($type == DIR or $type == SYMLINK);
 299     $hash{mtime}    -= TIME_OFFSET;
 300
 301     ### strip the high bits off the mode, which we don't need to store
 302     $hash{mode}     = STRIP_MODE->( $hash{mode} );
 303
 304
 305     ### probably requires some file path munging here ... ###
 306     ### name and prefix are set later
 307     my $obj = {
 308         %hash,
 309         name        => '',
 310         chksum      => CHECK_SUM,
 311         type        => $type,
 312         linkname    => ($type == SYMLINK and CAN_READLINK)
 313                             ? readlink $path
 314                             : '',
 315         magic       => MAGIC,
 316         version     => TAR_VERSION,
 317         uname       => UNAME->( $hash{uid} ),
 318         gname       => GNAME->( $hash{gid} ),
 319         devmajor    => 0,   # not handled
 320         devminor    => 0,   # not handled
 321         prefix      => '',
 322         data        => $data,
 323     };
 324
 325     bless $obj, $class;
 326
 327     ### fix up the prefix and file from the path
 328     my($prefix,$file) = $obj->_prefix_and_file( $path );
 329     $obj->prefix( $prefix );
 330     $obj->name( $file );
 331
 332     return $obj;
 333 }
 334
 335 sub _new_from_data {
 336     my $class   = shift;
 337     my $path    = shift;    return unless defined $path;
 338     my $data    = shift;    return unless defined $data;
 339     my $opt     = shift;
 340
 341     my $obj = {
 342         data        => $data,
 343         name        => '',
 344         mode        => MODE,
 345         uid         => UID,
 346         gid         => GID,
 347         size        => length $data,
 348         mtime       => time - TIME_OFFSET,
 349         chksum      => CHECK_SUM,
 350         type        => FILE,
 351         linkname    => '',
 352         magic       => MAGIC,
 353         version     => TAR_VERSION,
 354         uname       => UNAME->( UID ),
 355         gname       => GNAME->( GID ),
 356         devminor    => 0,
 357         devmajor    => 0,
 358         prefix      => '',
 359     };
 360
 361     ### overwrite with user options, if provided ###
 362     if( $opt and ref $opt eq 'HASH' ) {
 363         for my $key ( keys %$opt ) {
 364
 365             ### don't write bogus options ###
 366             next unless exists $obj->{$key};
 367             $obj->{$key} = $opt->{$key};
 368         }
 369     }
 370
 371     bless $obj, $class;
 372
 373     ### fix up the prefix and file from the path
 374     my($prefix,$file) = $obj->_prefix_and_file( $path );
 375     $obj->prefix( $prefix );
 376     $obj->name( $file );
 377
 378     return $obj;
 379 }
 380
 381 sub _prefix_and_file {
 382     my $self = shift;
 383     my $path = shift;
 384
 385     my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
 386     my @dirs = File::Spec->splitdir( $dirs );
 387
 388     ### so sometimes the last element is '' -- probably when trailing
 389     ### dir slashes are encountered... this is of course pointless,
 390     ### so remove it
 391     pop @dirs while @dirs and not length $dirs[-1];
 392
 393     ### if it's a directory, then $file might be empty
 394     $file = pop @dirs if $self->is_dir and not length $file;
 395
 396     ### splitting ../ gives you the relative path in native syntax
 397     map { $_ = '..' if $_  eq '-' } @dirs if ON_VMS;
 398
 399     my $prefix = File::Spec::Unix->catdir(
 400                         grep { length } $vol, @dirs
 401                     );
 402     return( $prefix, $file );
 403 }
 404
 405 sub _filetype {
 406     my $self = shift;
 407     my $file = shift;
 408
 409     return unless defined $file;
 410
 411     return SYMLINK  if (-l $file);      # Symlink
 412
 413     return FILE     if (-f _);          # Plain file
 414
 415     return DIR      if (-d _);          # Directory
 416
 417     return FIFO     if (-p _);          # Named pipe
 418
 419     return SOCKET   if (-S _);          # Socket
 420
 421     return BLOCKDEV if (-b _);          # Block special
 422
 423     return CHARDEV  if (-c _);          # Character special
 424
 425     ### shouldn't happen, this is when making archives, not reading ###
 426     return LONGLINK if ( $file eq LONGLINK_NAME );
 427
 428     return UNKNOWN;                         # Something else (like what?)
 429
 430 }
 431
 432 ### this method 'downgrades' a file to plain file -- this is used for
 433 ### symlinks when FOLLOW_SYMLINKS is true.
 434 sub _downgrade_to_plainfile {
 435     my $entry = shift;
 436     $entry->type( FILE );
 437     $entry->mode( MODE );
 438     $entry->linkname('');
 439
 440     return 1;
 441 }
 442
 443 =head2 $bool = $file->extract( [ $alternative_name ] )
 444
 445 Extract this object, optionally to an alternative name.
 446
 447 See C<< Archive::Tar->extract_file >> for details.
 448
 449 Returns true on success and false on failure.
 450
 451 =cut
 452
 453 sub extract {
 454     my $self = shift;
 455
 456     local $Carp::CarpLevel += 1;
 457
 458     return Archive::Tar->_extract_file( $self, @_ );
 459 }
 460
 461 =head2 $path = $file->full_path
 462
 463 Returns the full path from the tar header; this is basically a
 464 concatenation of the C<prefix> and C<name> fields.
 465
 466 =cut
 467
 468 sub full_path {
 469     my $self = shift;
 470
 471     ### if prefix field is emtpy
 472     return $self->name unless defined $self->prefix and length $self->prefix;
 473
 474     ### or otherwise, catfile'd
 475     return File::Spec::Unix->catfile( $self->prefix, $self->name );
 476 }
 477
 478
 479 =head2 $bool = $file->validate
 480
 481 Done by Archive::Tar internally when reading the tar file:
 482 validate the header against the checksum to ensure integer tar file.
 483
 484 Returns true on success, false on failure
 485
 486 =cut
 487
 488 sub validate {
 489     my $self = shift;
 490
 491     my $raw = $self->raw;
 492
 493     ### don't know why this one is different from the one we /write/ ###
 494     substr ($raw, 148, 8) = "        ";
 495
 496     ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
 497     ### like GNU tar does. See here for details:
 498     ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
 499     ### so we do both a signed AND unsigned validate. if one succeeds, that's
 500     ### good enough
 501         return (   (unpack ("%16C*", $raw) == $self->chksum)
 502                 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
 503 }
 504
 505 =head2 $bool = $file->has_content
 506
 507 Returns a boolean to indicate whether the current object has content.
 508 Some special files like directories and so on never will have any
 509 content. This method is mainly to make sure you don't get warnings
 510 for using uninitialized values when looking at an object's content.
 511
 512 =cut
 513
 514 sub has_content {
 515     my $self = shift;
 516     return defined $self->data() && length $self->data() ? 1 : 0;
 517 }
 518
 519 =head2 $content = $file->get_content
 520
 521 Returns the current content for the in-memory file
 522
 523 =cut
 524
 525 sub get_content {
 526     my $self = shift;
 527     $self->data( );
 528 }
 529
 530 =head2 $cref = $file->get_content_by_ref
 531
 532 Returns the current content for the in-memory file as a scalar
 533 reference. Normal users won't need this, but it will save memory if
 534 you are dealing with very large data files in your tar archive, since
 535 it will pass the contents by reference, rather than make a copy of it
 536 first.
 537
 538 =cut
 539
 540 sub get_content_by_ref {
 541     my $self = shift;
 542
 543     return \$self->{data};
 544 }
 545
 546 =head2 $bool = $file->replace_content( $content )
 547
 548 Replace the current content of the file with the new content. This
 549 only affects the in-memory archive, not the on-disk version until
 550 you write it.
 551
 552 Returns true on success, false on failure.
 553
 554 =cut
 555
 556 sub replace_content {
 557     my $self = shift;
 558     my $data = shift || '';
 559
 560     $self->data( $data );
 561     $self->size( length $data );
 562     return 1;
 563 }
 564
 565 =head2 $bool = $file->rename( $new_name )
 566
 567 Rename the current file to $new_name.
 568
 569 Note that you must specify a Unix path for $new_name, since per tar
 570 standard, all files in the archive must be Unix paths.
 571
 572 Returns true on success and false on failure.
 573
 574 =cut
 575
 576 sub rename {
 577     my $self = shift;
 578     my $path = shift;
 579
 580     return unless defined $path;
 581
 582     my ($prefix,$file) = $self->_prefix_and_file( $path );
 583
 584     $self->name( $file );
 585     $self->prefix( $prefix );
 586
 587         return 1;
 588 }
 589
 590 =head1 Convenience methods
 591
 592 To quickly check the type of a C<Archive::Tar::File> object, you can
 593 use the following methods:
 594
 595 =over 4
 596
 597 =item $file->is_file
 598
 599 Returns true if the file is of type C<file>
 600
 601 =item $file->is_dir
 602
 603 Returns true if the file is of type C<dir>
 604
 605 =item $file->is_hardlink
 606
 607 Returns true if the file is of type C<hardlink>
 608
 609 =item $file->is_symlink
 610
 611 Returns true if the file is of type C<symlink>
 612
 613 =item $file->is_chardev
 614
 615 Returns true if the file is of type C<chardev>
 616
 617 =item $file->is_blockdev
 618
 619 Returns true if the file is of type C<blockdev>
 620
 621 =item $file->is_fifo
 622
 623 Returns true if the file is of type C<fifo>
 624
 625 =item $file->is_socket
 626
 627 Returns true if the file is of type C<socket>
 628
 629 =item $file->is_longlink
 630
 631 Returns true if the file is of type C<LongLink>.
 632 Should not happen after a successful C<read>.
 633
 634 =item $file->is_label
 635
 636 Returns true if the file is of type C<Label>.
 637 Should not happen after a successful C<read>.
 638
 639 =item $file->is_unknown
 640
 641 Returns true if the file type is C<unknown>
 642
 643 =back
 644
 645 =cut
 646
 647 #stupid perl5.5.3 needs to warn if it's not numeric
 648 sub is_file     { local $^W;    FILE      == $_[0]->type }
 649 sub is_dir      { local $^W;    DIR       == $_[0]->type }
 650 sub is_hardlink { local $^W;    HARDLINK  == $_[0]->type }
 651 sub is_symlink  { local $^W;    SYMLINK   == $_[0]->type }
 652 sub is_chardev  { local $^W;    CHARDEV   == $_[0]->type }
 653 sub is_blockdev { local $^W;    BLOCKDEV  == $_[0]->type }
 654 sub is_fifo     { local $^W;    FIFO      == $_[0]->type }
 655 sub is_socket   { local $^W;    SOCKET    == $_[0]->type }
 656 sub is_unknown  { local $^W;    UNKNOWN   == $_[0]->type }
 657 sub is_longlink { local $^W;    LONGLINK  eq $_[0]->type }
 658 sub is_label    { local $^W;    LABEL     eq $_[0]->type }
 659
 660 1;