This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Archive-Tar to CPAN version 1.76
[perl5.git] / cpan / Archive-Tar / lib / Archive / Tar / File.pm
... / ...
CommitLineData
1package Archive::Tar::File;
2use strict;
3
4use Carp ();
5use IO::File;
6use File::Spec::Unix ();
7use File::Spec ();
8use File::Basename ();
9
10### avoid circular use, so only require;
11require Archive::Tar;
12use Archive::Tar::Constant;
13
14use vars qw[@ISA $VERSION];
15#@ISA = qw[Archive::Tar];
16$VERSION = '1.76';
17
18### set value to 1 to oct() it during the unpack ###
19my $tmpl = [
20 name => 0, # string
21 mode => 1, # octal
22 uid => 1, # octal
23 gid => 1, # octal
24 size => 1, # octal
25 mtime => 1, # octal
26 chksum => 1, # octal
27 type => 0, # character
28 linkname => 0, # string
29 magic => 0, # string
30 version => 0, # 2 bytes
31 uname => 0, # string
32 gname => 0, # string
33 devmajor => 1, # octal
34 devminor => 1, # octal
35 prefix => 0,
36
37### end UNPACK items ###
38 raw => 0, # the raw data chunk
39 data => 0, # the data associated with the file --
40 # This might be very memory intensive
41];
42
43### install get/set accessors for this object.
44for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
45 my $key = $tmpl->[$i];
46 no strict 'refs';
47 *{__PACKAGE__."::$key"} = sub {
48 my $self = shift;
49 $self->{$key} = $_[0] if @_;
50
51 ### just in case the key is not there or undef or something ###
52 { local $^W = 0;
53 return $self->{$key};
54 }
55 }
56}
57
58=head1 NAME
59
60Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
61
62=head1 SYNOPSIS
63
64 my @items = $tar->get_files;
65
66 print $_->name, ' ', $_->size, "\n" for @items;
67
68 print $object->get_content;
69 $object->replace_content('new content');
70
71 $object->rename( 'new/full/path/to/file.c' );
72
73=head1 DESCRIPTION
74
75Archive::Tar::Files provides a neat little object layer for in-memory
76extracted files. It's mostly used internally in Archive::Tar to tidy
77up the code, but there's no reason users shouldn't use this API as
78well.
79
80=head2 Accessors
81
82A lot of the methods in this package are accessors to the various
83fields in the tar header:
84
85=over 4
86
87=item name
88
89The file's name
90
91=item mode
92
93The file's mode
94
95=item uid
96
97The user id owning the file
98
99=item gid
100
101The group id owning the file
102
103=item size
104
105File size in bytes
106
107=item mtime
108
109Modification time. Adjusted to mac-time on MacOS if required
110
111=item chksum
112
113Checksum field for the tar header
114
115=item type
116
117File type -- numeric, but comparable to exported constants -- see
118Archive::Tar's documentation
119
120=item linkname
121
122If the file is a symlink, the file it's pointing to
123
124=item magic
125
126Tar magic string -- not useful for most users
127
128=item version
129
130Tar version string -- not useful for most users
131
132=item uname
133
134The user name that owns the file
135
136=item gname
137
138The group name that owns the file
139
140=item devmajor
141
142Device major number in case of a special file
143
144=item devminor
145
146Device minor number in case of a special file
147
148=item prefix
149
150Any directory to prefix to the extraction path, if any
151
152=item raw
153
154Raw tar header -- not useful for most users
155
156=back
157
158=head1 Methods
159
160=head2 Archive::Tar::File->new( file => $path )
161
162Returns a new Archive::Tar::File object from an existing file.
163
164Returns undef on failure.
165
166=head2 Archive::Tar::File->new( data => $path, $data, $opt )
167
168Returns a new Archive::Tar::File object from data.
169
170C<$path> defines the file name (which need not exist), C<$data> the
171file contents, and C<$opt> is a reference to a hash of attributes
172which may be used to override the default attributes (fields in the
173tar header), which are described above in the Accessors section.
174
175Returns undef on failure.
176
177=head2 Archive::Tar::File->new( chunk => $chunk )
178
179Returns a new Archive::Tar::File object from a raw 512-byte tar
180archive chunk.
181
182Returns undef on failure.
183
184=cut
185
186sub new {
187 my $class = shift;
188 my $what = shift;
189
190 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
191 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
192 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
193 undef;
194
195 return $obj;
196}
197
198### copies the data, creates a clone ###
199sub clone {
200 my $self = shift;
201 return bless { %$self }, ref $self;
202}
203
204sub _new_from_chunk {
205 my $class = shift;
206 my $chunk = shift or return; # 512 bytes of tar header
207 my %hash = @_;
208
209 ### filter any arguments on defined-ness of values.
210 ### this allows overriding from what the tar-header is saying
211 ### about this tar-entry. Particularly useful for @LongLink files
212 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
213
214 ### makes it start at 0 actually... :) ###
215 my $i = -1;
216 my %entry = map {
217 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
218 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
219
220 my $obj = bless { %entry, %args }, $class;
221
222 ### magic is a filetype string.. it should have something like 'ustar' or
223 ### something similar... if the chunk is garbage, skip it
224 return unless $obj->magic !~ /\W/;
225
226 ### store the original chunk ###
227 $obj->raw( $chunk );
228
229 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
230 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
231
232
233 return $obj;
234
235}
236
237sub _new_from_file {
238 my $class = shift;
239 my $path = shift;
240
241 ### path has to at least exist
242 return unless defined $path;
243
244 my $type = __PACKAGE__->_filetype($path);
245 my $data = '';
246
247 READ: {
248 unless ($type == DIR ) {
249 my $fh = IO::File->new;
250
251 unless( $fh->open($path) ) {
252 ### dangling symlinks are fine, stop reading but continue
253 ### creating the object
254 last READ if $type == SYMLINK;
255
256 ### otherwise, return from this function --
257 ### anything that's *not* a symlink should be
258 ### resolvable
259 return;
260 }
261
262 ### binmode needed to read files properly on win32 ###
263 binmode $fh;
264 $data = do { local $/; <$fh> };
265 close $fh;
266 }
267 }
268
269 my @items = qw[mode uid gid size mtime];
270 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
271
272 if (ON_VMS) {
273 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
274 ### not used. We currently do not have an easy way to see if we are in
275 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
276 ### The VMS UIC has the upper 16 bits is the GID, which in many cases
277 ### the VMS UIC will be larger than 209715, the largest that TAR can
278 ### handle. So for now, assume it is traditional if the UID is larger
279 ### than 0x10000.
280
281 if ($hash{uid} > 0x10000) {
282 $hash{uid} = $hash{uid} & 0xFFFF;
283 }
284
285 ### The file length from stat() is the physical length of the file
286 ### However the amount of data read in may be more for some file types.
287 ### Fixed length files are read past the logical EOF to end of the block
288 ### containing. Other file types get expanded on read because record
289 ### delimiters are added.
290
291 my $data_len = length $data;
292 $hash{size} = $data_len if $hash{size} < $data_len;
293
294 }
295 ### you *must* set size == 0 on symlinks, or the next entry will be
296 ### though of as the contents of the symlink, which is wrong.
297 ### this fixes bug #7937
298 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
299 $hash{mtime} -= TIME_OFFSET;
300
301 ### strip the high bits off the mode, which we don't need to store
302 $hash{mode} = STRIP_MODE->( $hash{mode} );
303
304
305 ### probably requires some file path munging here ... ###
306 ### name and prefix are set later
307 my $obj = {
308 %hash,
309 name => '',
310 chksum => CHECK_SUM,
311 type => $type,
312 linkname => ($type == SYMLINK and CAN_READLINK)
313 ? readlink $path
314 : '',
315 magic => MAGIC,
316 version => TAR_VERSION,
317 uname => UNAME->( $hash{uid} ),
318 gname => GNAME->( $hash{gid} ),
319 devmajor => 0, # not handled
320 devminor => 0, # not handled
321 prefix => '',
322 data => $data,
323 };
324
325 bless $obj, $class;
326
327 ### fix up the prefix and file from the path
328 my($prefix,$file) = $obj->_prefix_and_file( $path );
329 $obj->prefix( $prefix );
330 $obj->name( $file );
331
332 return $obj;
333}
334
335sub _new_from_data {
336 my $class = shift;
337 my $path = shift; return unless defined $path;
338 my $data = shift; return unless defined $data;
339 my $opt = shift;
340
341 my $obj = {
342 data => $data,
343 name => '',
344 mode => MODE,
345 uid => UID,
346 gid => GID,
347 size => length $data,
348 mtime => time - TIME_OFFSET,
349 chksum => CHECK_SUM,
350 type => FILE,
351 linkname => '',
352 magic => MAGIC,
353 version => TAR_VERSION,
354 uname => UNAME->( UID ),
355 gname => GNAME->( GID ),
356 devminor => 0,
357 devmajor => 0,
358 prefix => '',
359 };
360
361 ### overwrite with user options, if provided ###
362 if( $opt and ref $opt eq 'HASH' ) {
363 for my $key ( keys %$opt ) {
364
365 ### don't write bogus options ###
366 next unless exists $obj->{$key};
367 $obj->{$key} = $opt->{$key};
368 }
369 }
370
371 bless $obj, $class;
372
373 ### fix up the prefix and file from the path
374 my($prefix,$file) = $obj->_prefix_and_file( $path );
375 $obj->prefix( $prefix );
376 $obj->name( $file );
377
378 return $obj;
379}
380
381sub _prefix_and_file {
382 my $self = shift;
383 my $path = shift;
384
385 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
386 my @dirs = File::Spec->splitdir( $dirs );
387
388 ### so sometimes the last element is '' -- probably when trailing
389 ### dir slashes are encountered... this is of course pointless,
390 ### so remove it
391 pop @dirs while @dirs and not length $dirs[-1];
392
393 ### if it's a directory, then $file might be empty
394 $file = pop @dirs if $self->is_dir and not length $file;
395
396 ### splitting ../ gives you the relative path in native syntax
397 map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS;
398
399 my $prefix = File::Spec::Unix->catdir(
400 grep { length } $vol, @dirs
401 );
402 return( $prefix, $file );
403}
404
405sub _filetype {
406 my $self = shift;
407 my $file = shift;
408
409 return unless defined $file;
410
411 return SYMLINK if (-l $file); # Symlink
412
413 return FILE if (-f _); # Plain file
414
415 return DIR if (-d _); # Directory
416
417 return FIFO if (-p _); # Named pipe
418
419 return SOCKET if (-S _); # Socket
420
421 return BLOCKDEV if (-b _); # Block special
422
423 return CHARDEV if (-c _); # Character special
424
425 ### shouldn't happen, this is when making archives, not reading ###
426 return LONGLINK if ( $file eq LONGLINK_NAME );
427
428 return UNKNOWN; # Something else (like what?)
429
430}
431
432### this method 'downgrades' a file to plain file -- this is used for
433### symlinks when FOLLOW_SYMLINKS is true.
434sub _downgrade_to_plainfile {
435 my $entry = shift;
436 $entry->type( FILE );
437 $entry->mode( MODE );
438 $entry->linkname('');
439
440 return 1;
441}
442
443=head2 $bool = $file->extract( [ $alternative_name ] )
444
445Extract this object, optionally to an alternative name.
446
447See C<< Archive::Tar->extract_file >> for details.
448
449Returns true on success and false on failure.
450
451=cut
452
453sub extract {
454 my $self = shift;
455
456 local $Carp::CarpLevel += 1;
457
458 return Archive::Tar->_extract_file( $self, @_ );
459}
460
461=head2 $path = $file->full_path
462
463Returns the full path from the tar header; this is basically a
464concatenation of the C<prefix> and C<name> fields.
465
466=cut
467
468sub full_path {
469 my $self = shift;
470
471 ### if prefix field is emtpy
472 return $self->name unless defined $self->prefix and length $self->prefix;
473
474 ### or otherwise, catfile'd
475 return File::Spec::Unix->catfile( $self->prefix, $self->name );
476}
477
478
479=head2 $bool = $file->validate
480
481Done by Archive::Tar internally when reading the tar file:
482validate the header against the checksum to ensure integer tar file.
483
484Returns true on success, false on failure
485
486=cut
487
488sub validate {
489 my $self = shift;
490
491 my $raw = $self->raw;
492
493 ### don't know why this one is different from the one we /write/ ###
494 substr ($raw, 148, 8) = " ";
495
496 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
497 ### like GNU tar does. See here for details:
498 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
499 ### so we do both a signed AND unsigned validate. if one succeeds, that's
500 ### good enough
501 return ( (unpack ("%16C*", $raw) == $self->chksum)
502 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
503}
504
505=head2 $bool = $file->has_content
506
507Returns a boolean to indicate whether the current object has content.
508Some special files like directories and so on never will have any
509content. This method is mainly to make sure you don't get warnings
510for using uninitialized values when looking at an object's content.
511
512=cut
513
514sub has_content {
515 my $self = shift;
516 return defined $self->data() && length $self->data() ? 1 : 0;
517}
518
519=head2 $content = $file->get_content
520
521Returns the current content for the in-memory file
522
523=cut
524
525sub get_content {
526 my $self = shift;
527 $self->data( );
528}
529
530=head2 $cref = $file->get_content_by_ref
531
532Returns the current content for the in-memory file as a scalar
533reference. Normal users won't need this, but it will save memory if
534you are dealing with very large data files in your tar archive, since
535it will pass the contents by reference, rather than make a copy of it
536first.
537
538=cut
539
540sub get_content_by_ref {
541 my $self = shift;
542
543 return \$self->{data};
544}
545
546=head2 $bool = $file->replace_content( $content )
547
548Replace the current content of the file with the new content. This
549only affects the in-memory archive, not the on-disk version until
550you write it.
551
552Returns true on success, false on failure.
553
554=cut
555
556sub replace_content {
557 my $self = shift;
558 my $data = shift || '';
559
560 $self->data( $data );
561 $self->size( length $data );
562 return 1;
563}
564
565=head2 $bool = $file->rename( $new_name )
566
567Rename the current file to $new_name.
568
569Note that you must specify a Unix path for $new_name, since per tar
570standard, all files in the archive must be Unix paths.
571
572Returns true on success and false on failure.
573
574=cut
575
576sub rename {
577 my $self = shift;
578 my $path = shift;
579
580 return unless defined $path;
581
582 my ($prefix,$file) = $self->_prefix_and_file( $path );
583
584 $self->name( $file );
585 $self->prefix( $prefix );
586
587 return 1;
588}
589
590=head1 Convenience methods
591
592To quickly check the type of a C<Archive::Tar::File> object, you can
593use the following methods:
594
595=over 4
596
597=item $file->is_file
598
599Returns true if the file is of type C<file>
600
601=item $file->is_dir
602
603Returns true if the file is of type C<dir>
604
605=item $file->is_hardlink
606
607Returns true if the file is of type C<hardlink>
608
609=item $file->is_symlink
610
611Returns true if the file is of type C<symlink>
612
613=item $file->is_chardev
614
615Returns true if the file is of type C<chardev>
616
617=item $file->is_blockdev
618
619Returns true if the file is of type C<blockdev>
620
621=item $file->is_fifo
622
623Returns true if the file is of type C<fifo>
624
625=item $file->is_socket
626
627Returns true if the file is of type C<socket>
628
629=item $file->is_longlink
630
631Returns true if the file is of type C<LongLink>.
632Should not happen after a successful C<read>.
633
634=item $file->is_label
635
636Returns true if the file is of type C<Label>.
637Should not happen after a successful C<read>.
638
639=item $file->is_unknown
640
641Returns true if the file type is C<unknown>
642
643=back
644
645=cut
646
647#stupid perl5.5.3 needs to warn if it's not numeric
648sub is_file { local $^W; FILE == $_[0]->type }
649sub is_dir { local $^W; DIR == $_[0]->type }
650sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
651sub is_symlink { local $^W; SYMLINK == $_[0]->type }
652sub is_chardev { local $^W; CHARDEV == $_[0]->type }
653sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
654sub is_fifo { local $^W; FIFO == $_[0]->type }
655sub is_socket { local $^W; SOCKET == $_[0]->type }
656sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
657sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
658sub is_label { local $^W; LABEL eq $_[0]->type }
659
6601;