This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Archive-Tar to CPAN version 1.76
[perl5.git] / cpan / Archive-Tar / lib / Archive / Tar / File.pm
CommitLineData
39713df4
RGS
1package Archive::Tar::File;
2use strict;
3
642eb381 4use Carp ();
39713df4 5use IO::File;
81a5970e
RGS
6use File::Spec::Unix ();
7use File::Spec ();
8use File::Basename ();
9
642eb381
SH
10### avoid circular use, so only require;
11require Archive::Tar;
39713df4
RGS
12use Archive::Tar::Constant;
13
14use vars qw[@ISA $VERSION];
642eb381 15#@ISA = qw[Archive::Tar];
89ae55b4 16$VERSION = '1.76';
39713df4
RGS
17
18### set value to 1 to oct() it during the unpack ###
19my $tmpl = [
20 name => 0, # string
21 mode => 1, # octal
22 uid => 1, # octal
23 gid => 1, # octal
24 size => 1, # octal
25 mtime => 1, # octal
26 chksum => 1, # octal
27 type => 0, # character
28 linkname => 0, # string
29 magic => 0, # string
30 version => 0, # 2 bytes
31 uname => 0, # string
32 gname => 0, # string
33 devmajor => 1, # octal
34 devminor => 1, # octal
35 prefix => 0,
36
37### end UNPACK items ###
38 raw => 0, # the raw data chunk
39 data => 0, # the data associated with the file --
40 # This might be very memory intensive
41];
42
43### install get/set accessors for this object.
44for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
45 my $key = $tmpl->[$i];
46 no strict 'refs';
47 *{__PACKAGE__."::$key"} = sub {
48 my $self = shift;
49 $self->{$key} = $_[0] if @_;
50
51 ### just in case the key is not there or undef or something ###
52 { local $^W = 0;
53 return $self->{$key};
54 }
55 }
56}
57
58=head1 NAME
59
60Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
61
62=head1 SYNOPSIS
63
64 my @items = $tar->get_files;
65
66 print $_->name, ' ', $_->size, "\n" for @items;
67
68 print $object->get_content;
69 $object->replace_content('new content');
70
71 $object->rename( 'new/full/path/to/file.c' );
72
73=head1 DESCRIPTION
74
75Archive::Tar::Files provides a neat little object layer for in-memory
76extracted files. It's mostly used internally in Archive::Tar to tidy
77up the code, but there's no reason users shouldn't use this API as
78well.
79
80=head2 Accessors
81
82A lot of the methods in this package are accessors to the various
83fields in the tar header:
84
85=over 4
86
87=item name
88
89The file's name
90
91=item mode
92
93The file's mode
94
95=item uid
96
97The user id owning the file
98
99=item gid
100
101The group id owning the file
102
103=item size
104
105File size in bytes
106
107=item mtime
108
109Modification time. Adjusted to mac-time on MacOS if required
110
111=item chksum
112
113Checksum field for the tar header
114
115=item type
116
117File type -- numeric, but comparable to exported constants -- see
118Archive::Tar's documentation
119
120=item linkname
121
122If the file is a symlink, the file it's pointing to
123
124=item magic
125
126Tar magic string -- not useful for most users
127
128=item version
129
130Tar version string -- not useful for most users
131
132=item uname
133
134The user name that owns the file
135
136=item gname
137
138The group name that owns the file
139
140=item devmajor
141
142Device major number in case of a special file
143
144=item devminor
145
146Device minor number in case of a special file
147
148=item prefix
149
150Any directory to prefix to the extraction path, if any
151
152=item raw
153
154Raw tar header -- not useful for most users
155
156=back
157
158=head1 Methods
159
642eb381 160=head2 Archive::Tar::File->new( file => $path )
39713df4
RGS
161
162Returns a new Archive::Tar::File object from an existing file.
163
164Returns undef on failure.
165
642eb381 166=head2 Archive::Tar::File->new( data => $path, $data, $opt )
39713df4
RGS
167
168Returns a new Archive::Tar::File object from data.
169
170C<$path> defines the file name (which need not exist), C<$data> the
171file contents, and C<$opt> is a reference to a hash of attributes
172which may be used to override the default attributes (fields in the
173tar header), which are described above in the Accessors section.
174
175Returns undef on failure.
176
642eb381 177=head2 Archive::Tar::File->new( chunk => $chunk )
39713df4
RGS
178
179Returns a new Archive::Tar::File object from a raw 512-byte tar
180archive chunk.
181
182Returns undef on failure.
183
184=cut
185
186sub new {
187 my $class = shift;
188 my $what = shift;
189
190 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
191 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
192 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
193 undef;
194
195 return $obj;
196}
197
198### copies the data, creates a clone ###
199sub clone {
200 my $self = shift;
201 return bless { %$self }, ref $self;
202}
203
204sub _new_from_chunk {
205 my $class = shift;
01d11a1c 206 my $chunk = shift or return; # 512 bytes of tar header
81a5970e
RGS
207 my %hash = @_;
208
209 ### filter any arguments on defined-ness of values.
210 ### this allows overriding from what the tar-header is saying
211 ### about this tar-entry. Particularly useful for @LongLink files
212 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
39713df4
RGS
213
214 ### makes it start at 0 actually... :) ###
215 my $i = -1;
216 my %entry = map {
217 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
218 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
219
81a5970e 220 my $obj = bless { %entry, %args }, $class;
39713df4
RGS
221
222 ### magic is a filetype string.. it should have something like 'ustar' or
223 ### something similar... if the chunk is garbage, skip it
224 return unless $obj->magic !~ /\W/;
225
226 ### store the original chunk ###
227 $obj->raw( $chunk );
228
229 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
230 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
231
232
233 return $obj;
234
235}
236
237sub _new_from_file {
238 my $class = shift;
01d11a1c
SP
239 my $path = shift;
240
241 ### path has to at least exist
242 return unless defined $path;
243
39713df4
RGS
244 my $type = __PACKAGE__->_filetype($path);
245 my $data = '';
246
97a504ba
RGS
247 READ: {
248 unless ($type == DIR ) {
249 my $fh = IO::File->new;
250
251 unless( $fh->open($path) ) {
252 ### dangling symlinks are fine, stop reading but continue
253 ### creating the object
254 last READ if $type == SYMLINK;
255
256 ### otherwise, return from this function --
257 ### anything that's *not* a symlink should be
258 ### resolvable
259 return;
260 }
261
262 ### binmode needed to read files properly on win32 ###
263 binmode $fh;
264 $data = do { local $/; <$fh> };
265 close $fh;
266 }
39713df4
RGS
267 }
268
269 my @items = qw[mode uid gid size mtime];
270 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
271
642eb381
SH
272 if (ON_VMS) {
273 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
274 ### not used. We currently do not have an easy way to see if we are in
275 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
276 ### The VMS UIC has the upper 16 bits is the GID, which in many cases
277 ### the VMS UIC will be larger than 209715, the largest that TAR can
278 ### handle. So for now, assume it is traditional if the UID is larger
279 ### than 0x10000.
280
281 if ($hash{uid} > 0x10000) {
282 $hash{uid} = $hash{uid} & 0xFFFF;
283 }
284
285 ### The file length from stat() is the physical length of the file
286 ### However the amount of data read in may be more for some file types.
287 ### Fixed length files are read past the logical EOF to end of the block
288 ### containing. Other file types get expanded on read because record
289 ### delimiters are added.
290
291 my $data_len = length $data;
292 $hash{size} = $data_len if $hash{size} < $data_len;
293
294 }
39713df4
RGS
295 ### you *must* set size == 0 on symlinks, or the next entry will be
296 ### though of as the contents of the symlink, which is wrong.
297 ### this fixes bug #7937
298 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
299 $hash{mtime} -= TIME_OFFSET;
300
301 ### strip the high bits off the mode, which we don't need to store
302 $hash{mode} = STRIP_MODE->( $hash{mode} );
303
304
305 ### probably requires some file path munging here ... ###
306 ### name and prefix are set later
307 my $obj = {
308 %hash,
309 name => '',
310 chksum => CHECK_SUM,
311 type => $type,
312 linkname => ($type == SYMLINK and CAN_READLINK)
313 ? readlink $path
314 : '',
315 magic => MAGIC,
316 version => TAR_VERSION,
317 uname => UNAME->( $hash{uid} ),
318 gname => GNAME->( $hash{gid} ),
319 devmajor => 0, # not handled
320 devminor => 0, # not handled
321 prefix => '',
322 data => $data,
323 };
324
325 bless $obj, $class;
326
327 ### fix up the prefix and file from the path
328 my($prefix,$file) = $obj->_prefix_and_file( $path );
329 $obj->prefix( $prefix );
330 $obj->name( $file );
331
332 return $obj;
333}
334
335sub _new_from_data {
336 my $class = shift;
01d11a1c 337 my $path = shift; return unless defined $path;
39713df4
RGS
338 my $data = shift; return unless defined $data;
339 my $opt = shift;
340
341 my $obj = {
342 data => $data,
343 name => '',
344 mode => MODE,
345 uid => UID,
346 gid => GID,
347 size => length $data,
348 mtime => time - TIME_OFFSET,
349 chksum => CHECK_SUM,
350 type => FILE,
351 linkname => '',
352 magic => MAGIC,
353 version => TAR_VERSION,
354 uname => UNAME->( UID ),
355 gname => GNAME->( GID ),
356 devminor => 0,
357 devmajor => 0,
358 prefix => '',
359 };
360
361 ### overwrite with user options, if provided ###
362 if( $opt and ref $opt eq 'HASH' ) {
363 for my $key ( keys %$opt ) {
364
365 ### don't write bogus options ###
366 next unless exists $obj->{$key};
367 $obj->{$key} = $opt->{$key};
368 }
369 }
370
371 bless $obj, $class;
372
373 ### fix up the prefix and file from the path
374 my($prefix,$file) = $obj->_prefix_and_file( $path );
375 $obj->prefix( $prefix );
376 $obj->name( $file );
377
378 return $obj;
379}
380
381sub _prefix_and_file {
382 my $self = shift;
383 my $path = shift;
384
385 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
386 my @dirs = File::Spec->splitdir( $dirs );
387
388 ### so sometimes the last element is '' -- probably when trailing
99fc177f 389 ### dir slashes are encountered... this is of course pointless,
39713df4
RGS
390 ### so remove it
391 pop @dirs while @dirs and not length $dirs[-1];
392
393 ### if it's a directory, then $file might be empty
394 $file = pop @dirs if $self->is_dir and not length $file;
395
f5695358
JB
396 ### splitting ../ gives you the relative path in native syntax
397 map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS;
398
39713df4
RGS
399 my $prefix = File::Spec::Unix->catdir(
400 grep { length } $vol, @dirs
401 );
402 return( $prefix, $file );
403}
404
405sub _filetype {
406 my $self = shift;
01d11a1c
SP
407 my $file = shift;
408
409 return unless defined $file;
39713df4
RGS
410
411 return SYMLINK if (-l $file); # Symlink
412
413 return FILE if (-f _); # Plain file
414
415 return DIR if (-d _); # Directory
416
417 return FIFO if (-p _); # Named pipe
418
419 return SOCKET if (-S _); # Socket
420
421 return BLOCKDEV if (-b _); # Block special
422
423 return CHARDEV if (-c _); # Character special
424
425 ### shouldn't happen, this is when making archives, not reading ###
426 return LONGLINK if ( $file eq LONGLINK_NAME );
427
428 return UNKNOWN; # Something else (like what?)
429
430}
431
432### this method 'downgrades' a file to plain file -- this is used for
433### symlinks when FOLLOW_SYMLINKS is true.
434sub _downgrade_to_plainfile {
435 my $entry = shift;
436 $entry->type( FILE );
437 $entry->mode( MODE );
438 $entry->linkname('');
439
440 return 1;
441}
442
642eb381
SH
443=head2 $bool = $file->extract( [ $alternative_name ] )
444
445Extract this object, optionally to an alternative name.
446
447See C<< Archive::Tar->extract_file >> for details.
448
449Returns true on success and false on failure.
450
451=cut
452
453sub extract {
454 my $self = shift;
455
456 local $Carp::CarpLevel += 1;
457
458 return Archive::Tar->_extract_file( $self, @_ );
459}
460
461=head2 $path = $file->full_path
39713df4
RGS
462
463Returns the full path from the tar header; this is basically a
464concatenation of the C<prefix> and C<name> fields.
465
466=cut
467
468sub full_path {
469 my $self = shift;
470
471 ### if prefix field is emtpy
472 return $self->name unless defined $self->prefix and length $self->prefix;
473
474 ### or otherwise, catfile'd
475 return File::Spec::Unix->catfile( $self->prefix, $self->name );
476}
477
478
642eb381 479=head2 $bool = $file->validate
39713df4
RGS
480
481Done by Archive::Tar internally when reading the tar file:
482validate the header against the checksum to ensure integer tar file.
483
484Returns true on success, false on failure
485
486=cut
487
488sub validate {
489 my $self = shift;
490
491 my $raw = $self->raw;
492
493 ### don't know why this one is different from the one we /write/ ###
494 substr ($raw, 148, 8) = " ";
bef46b70
JB
495
496 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
497 ### like GNU tar does. See here for details:
498 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
499 ### so we do both a signed AND unsigned validate. if one succeeds, that's
500 ### good enough
501 return ( (unpack ("%16C*", $raw) == $self->chksum)
502 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
39713df4
RGS
503}
504
642eb381 505=head2 $bool = $file->has_content
39713df4
RGS
506
507Returns a boolean to indicate whether the current object has content.
508Some special files like directories and so on never will have any
509content. This method is mainly to make sure you don't get warnings
510for using uninitialized values when looking at an object's content.
511
512=cut
513
514sub has_content {
515 my $self = shift;
516 return defined $self->data() && length $self->data() ? 1 : 0;
517}
518
642eb381 519=head2 $content = $file->get_content
39713df4
RGS
520
521Returns the current content for the in-memory file
522
523=cut
524
525sub get_content {
526 my $self = shift;
527 $self->data( );
528}
529
642eb381 530=head2 $cref = $file->get_content_by_ref
39713df4
RGS
531
532Returns the current content for the in-memory file as a scalar
533reference. Normal users won't need this, but it will save memory if
534you are dealing with very large data files in your tar archive, since
535it will pass the contents by reference, rather than make a copy of it
536first.
537
538=cut
539
540sub get_content_by_ref {
541 my $self = shift;
542
543 return \$self->{data};
544}
545
642eb381 546=head2 $bool = $file->replace_content( $content )
39713df4
RGS
547
548Replace the current content of the file with the new content. This
549only affects the in-memory archive, not the on-disk version until
550you write it.
551
552Returns true on success, false on failure.
553
554=cut
555
556sub replace_content {
557 my $self = shift;
558 my $data = shift || '';
559
560 $self->data( $data );
561 $self->size( length $data );
562 return 1;
563}
564
642eb381 565=head2 $bool = $file->rename( $new_name )
39713df4
RGS
566
567Rename the current file to $new_name.
568
569Note that you must specify a Unix path for $new_name, since per tar
570standard, all files in the archive must be Unix paths.
571
572Returns true on success and false on failure.
573
574=cut
575
576sub rename {
577 my $self = shift;
01d11a1c
SP
578 my $path = shift;
579
580 return unless defined $path;
39713df4
RGS
581
582 my ($prefix,$file) = $self->_prefix_and_file( $path );
583
584 $self->name( $file );
585 $self->prefix( $prefix );
586
587 return 1;
588}
589
590=head1 Convenience methods
591
592To quickly check the type of a C<Archive::Tar::File> object, you can
593use the following methods:
594
595=over 4
596
642eb381 597=item $file->is_file
39713df4
RGS
598
599Returns true if the file is of type C<file>
600
642eb381 601=item $file->is_dir
39713df4
RGS
602
603Returns true if the file is of type C<dir>
604
642eb381 605=item $file->is_hardlink
39713df4
RGS
606
607Returns true if the file is of type C<hardlink>
608
642eb381 609=item $file->is_symlink
39713df4
RGS
610
611Returns true if the file is of type C<symlink>
612
642eb381 613=item $file->is_chardev
39713df4
RGS
614
615Returns true if the file is of type C<chardev>
616
642eb381 617=item $file->is_blockdev
39713df4
RGS
618
619Returns true if the file is of type C<blockdev>
620
642eb381 621=item $file->is_fifo
39713df4
RGS
622
623Returns true if the file is of type C<fifo>
624
642eb381 625=item $file->is_socket
39713df4
RGS
626
627Returns true if the file is of type C<socket>
628
642eb381 629=item $file->is_longlink
39713df4
RGS
630
631Returns true if the file is of type C<LongLink>.
632Should not happen after a successful C<read>.
633
642eb381 634=item $file->is_label
39713df4
RGS
635
636Returns true if the file is of type C<Label>.
637Should not happen after a successful C<read>.
638
642eb381 639=item $file->is_unknown
39713df4
RGS
640
641Returns true if the file type is C<unknown>
642
643=back
644
645=cut
646
647#stupid perl5.5.3 needs to warn if it's not numeric
648sub is_file { local $^W; FILE == $_[0]->type }
649sub is_dir { local $^W; DIR == $_[0]->type }
650sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
651sub is_symlink { local $^W; SYMLINK == $_[0]->type }
652sub is_chardev { local $^W; CHARDEV == $_[0]->type }
653sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
654sub is_fifo { local $^W; FIFO == $_[0]->type }
655sub is_socket { local $^W; SOCKET == $_[0]->type }
656sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
657sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
658sub is_label { local $^W; LABEL eq $_[0]->type }
659
6601;