perl5.git.perl.org Git - perl5.git/blame_incremental - cpan/Archive-Tar/lib/Archive/Tar/File.pm

... / ...

Commit	Line	Data
	1	package Archive::Tar::File;
	2	use strict;
	3
	4	use Carp ();
	5	use IO::File;
	6	use File::Spec::Unix ();
	7	use File::Spec ();
	8	use File::Basename ();
	9
	10	### avoid circular use, so only require;
	11	require Archive::Tar;
	12	use Archive::Tar::Constant;
	13
	14	use vars qw[@ISA $VERSION];
	15	#@ISA = qw[Archive::Tar];
	16	$VERSION = '1.76';
	17
	18	### set value to 1 to oct() it during the unpack ###
	19	my $tmpl = [
	20	name => 0, # string
	21	mode => 1, # octal
	22	uid => 1, # octal
	23	gid => 1, # octal
	24	size => 1, # octal
	25	mtime => 1, # octal
	26	chksum => 1, # octal
	27	type => 0, # character
	28	linkname => 0, # string
	29	magic => 0, # string
	30	version => 0, # 2 bytes
	31	uname => 0, # string
	32	gname => 0, # string
	33	devmajor => 1, # octal
	34	devminor => 1, # octal
	35	prefix => 0,
	36
	37	### end UNPACK items ###
	38	raw => 0, # the raw data chunk
	39	data => 0, # the data associated with the file --
	40	# This might be very memory intensive
	41	];
	42
	43	### install get/set accessors for this object.
	44	for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
	45	my $key = $tmpl->[$i];
	46	no strict 'refs';
	47	*{__PACKAGE__."::$key"} = sub {
	48	my $self = shift;
	49	$self->{$key} = $_[0] if @_;
	50
	51	### just in case the key is not there or undef or something ###
	52	{ local $^W = 0;
	53	return $self->{$key};
	54	}
	55	}
	56	}
	57
	58	=head1 NAME
	59
	60	Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
	61
	62	=head1 SYNOPSIS
	63
	64	my @items = $tar->get_files;
	65
	66	print $_->name, ' ', $_->size, "\n" for @items;
	67
	68	print $object->get_content;
	69	$object->replace_content('new content');
	70
	71	$object->rename( 'new/full/path/to/file.c' );
	72
	73	=head1 DESCRIPTION
	74
	75	Archive::Tar::Files provides a neat little object layer for in-memory
	76	extracted files. It's mostly used internally in Archive::Tar to tidy
	77	up the code, but there's no reason users shouldn't use this API as
	78	well.
	79
	80	=head2 Accessors
	81
	82	A lot of the methods in this package are accessors to the various
	83	fields in the tar header:
	84
	85	=over 4
	86
	87	=item name
	88
	89	The file's name
	90
	91	=item mode
	92
	93	The file's mode
	94
	95	=item uid
	96
	97	The user id owning the file
	98
	99	=item gid
	100
	101	The group id owning the file
	102
	103	=item size
	104
	105	File size in bytes
	106
	107	=item mtime
	108
	109	Modification time. Adjusted to mac-time on MacOS if required
	110
	111	=item chksum
	112
	113	Checksum field for the tar header
	114
	115	=item type
	116
	117	File type -- numeric, but comparable to exported constants -- see
	118	Archive::Tar's documentation
	119
	120	=item linkname
	121
	122	If the file is a symlink, the file it's pointing to
	123
	124	=item magic
	125
	126	Tar magic string -- not useful for most users
	127
	128	=item version
	129
	130	Tar version string -- not useful for most users
	131
	132	=item uname
	133
	134	The user name that owns the file
	135
	136	=item gname
	137
	138	The group name that owns the file
	139
	140	=item devmajor
	141
	142	Device major number in case of a special file
	143
	144	=item devminor
	145
	146	Device minor number in case of a special file
	147
	148	=item prefix
	149
	150	Any directory to prefix to the extraction path, if any
	151
	152	=item raw
	153
	154	Raw tar header -- not useful for most users
	155
	156	=back
	157
	158	=head1 Methods
	159
	160	=head2 Archive::Tar::File->new( file => $path )
	161
	162	Returns a new Archive::Tar::File object from an existing file.
	163
	164	Returns undef on failure.
	165
	166	=head2 Archive::Tar::File->new( data => $path, $data, $opt )
	167
	168	Returns a new Archive::Tar::File object from data.
	169
	170	C<$path> defines the file name (which need not exist), C<$data> the
	171	file contents, and C<$opt> is a reference to a hash of attributes
	172	which may be used to override the default attributes (fields in the
	173	tar header), which are described above in the Accessors section.
	174
	175	Returns undef on failure.
	176
	177	=head2 Archive::Tar::File->new( chunk => $chunk )
	178
	179	Returns a new Archive::Tar::File object from a raw 512-byte tar
	180	archive chunk.
	181
	182	Returns undef on failure.
	183
	184	=cut
	185
	186	sub new {
	187	my $class = shift;
	188	my $what = shift;
	189
	190	my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
	191	($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
	192	($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
	193	undef;
	194
	195	return $obj;
	196	}
	197
	198	### copies the data, creates a clone ###
	199	sub clone {
	200	my $self = shift;
	201	return bless { %$self }, ref $self;
	202	}
	203
	204	sub _new_from_chunk {
	205	my $class = shift;
	206	my $chunk = shift or return; # 512 bytes of tar header
	207	my %hash = @_;
	208
	209	### filter any arguments on defined-ness of values.
	210	### this allows overriding from what the tar-header is saying
	211	### about this tar-entry. Particularly useful for @LongLink files
	212	my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
	213
	214	### makes it start at 0 actually... :) ###
	215	my $i = -1;
	216	my %entry = map {
	217	$tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_
	218	} map { /^([^\0]*)/ } unpack( UNPACK, $chunk );
	219
	220	my $obj = bless { %entry, %args }, $class;
	221
	222	### magic is a filetype string.. it should have something like 'ustar' or
	223	### something similar... if the chunk is garbage, skip it
	224	return unless $obj->magic !~ /\W/;
	225
	226	### store the original chunk ###
	227	$obj->raw( $chunk );
	228
	229	$obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
	230	$obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m\|/$\|) );
	231
	232
	233	return $obj;
	234
	235	}
	236
	237	sub _new_from_file {
	238	my $class = shift;
	239	my $path = shift;
	240
	241	### path has to at least exist
	242	return unless defined $path;
	243
	244	my $type = __PACKAGE__->_filetype($path);
	245	my $data = '';
	246
	247	READ: {
	248	unless ($type == DIR ) {
	249	my $fh = IO::File->new;
	250
	251	unless( $fh->open($path) ) {
	252	### dangling symlinks are fine, stop reading but continue
	253	### creating the object
	254	last READ if $type == SYMLINK;
	255
	256	### otherwise, return from this function --
	257	### anything that's not a symlink should be
	258	### resolvable
	259	return;
	260	}
	261
	262	### binmode needed to read files properly on win32 ###
	263	binmode $fh;
	264	$data = do { local $/; <$fh> };
	265	close $fh;
	266	}
	267	}
	268
	269	my @items = qw[mode uid gid size mtime];
	270	my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
	271
	272	if (ON_VMS) {
	273	### VMS has two UID modes, traditional and POSIX. Normally POSIX is
	274	### not used. We currently do not have an easy way to see if we are in
	275	### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
	276	### The VMS UIC has the upper 16 bits is the GID, which in many cases
	277	### the VMS UIC will be larger than 209715, the largest that TAR can
	278	### handle. So for now, assume it is traditional if the UID is larger
	279	### than 0x10000.
	280
	281	if ($hash{uid} > 0x10000) {
	282	$hash{uid} = $hash{uid} & 0xFFFF;
	283	}
	284
	285	### The file length from stat() is the physical length of the file
	286	### However the amount of data read in may be more for some file types.
	287	### Fixed length files are read past the logical EOF to end of the block
	288	### containing. Other file types get expanded on read because record
	289	### delimiters are added.
	290
	291	my $data_len = length $data;
	292	$hash{size} = $data_len if $hash{size} < $data_len;
	293
	294	}
	295	### you must set size == 0 on symlinks, or the next entry will be
	296	### though of as the contents of the symlink, which is wrong.
	297	### this fixes bug #7937
	298	$hash{size} = 0 if ($type == DIR or $type == SYMLINK);
	299	$hash{mtime} -= TIME_OFFSET;
	300
	301	### strip the high bits off the mode, which we don't need to store
	302	$hash{mode} = STRIP_MODE->( $hash{mode} );
	303
	304
	305	### probably requires some file path munging here ... ###
	306	### name and prefix are set later
	307	my $obj = {
	308	%hash,
	309	name => '',
	310	chksum => CHECK_SUM,
	311	type => $type,
	312	linkname => ($type == SYMLINK and CAN_READLINK)
	313	? readlink $path
	314	: '',
	315	magic => MAGIC,
	316	version => TAR_VERSION,
	317	uname => UNAME->( $hash{uid} ),
	318	gname => GNAME->( $hash{gid} ),
	319	devmajor => 0, # not handled
	320	devminor => 0, # not handled
	321	prefix => '',
	322	data => $data,
	323	};
	324
	325	bless $obj, $class;
	326
	327	### fix up the prefix and file from the path
	328	my($prefix,$file) = $obj->_prefix_and_file( $path );
	329	$obj->prefix( $prefix );
	330	$obj->name( $file );
	331
	332	return $obj;
	333	}
	334
	335	sub _new_from_data {
	336	my $class = shift;
	337	my $path = shift; return unless defined $path;
	338	my $data = shift; return unless defined $data;
	339	my $opt = shift;
	340
	341	my $obj = {
	342	data => $data,
	343	name => '',
	344	mode => MODE,
	345	uid => UID,
	346	gid => GID,
	347	size => length $data,
	348	mtime => time - TIME_OFFSET,
	349	chksum => CHECK_SUM,
	350	type => FILE,
	351	linkname => '',
	352	magic => MAGIC,
	353	version => TAR_VERSION,
	354	uname => UNAME->( UID ),
	355	gname => GNAME->( GID ),
	356	devminor => 0,
	357	devmajor => 0,
	358	prefix => '',
	359	};
	360
	361	### overwrite with user options, if provided ###
	362	if( $opt and ref $opt eq 'HASH' ) {
	363	for my $key ( keys %$opt ) {
	364
	365	### don't write bogus options ###
	366	next unless exists $obj->{$key};
	367	$obj->{$key} = $opt->{$key};
	368	}
	369	}
	370
	371	bless $obj, $class;
	372
	373	### fix up the prefix and file from the path
	374	my($prefix,$file) = $obj->_prefix_and_file( $path );
	375	$obj->prefix( $prefix );
	376	$obj->name( $file );
	377
	378	return $obj;
	379	}
	380
	381	sub _prefix_and_file {
	382	my $self = shift;
	383	my $path = shift;
	384
	385	my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
	386	my @dirs = File::Spec->splitdir( $dirs );
	387
	388	### so sometimes the last element is '' -- probably when trailing
	389	### dir slashes are encountered... this is of course pointless,
	390	### so remove it
	391	pop @dirs while @dirs and not length $dirs[-1];
	392
	393	### if it's a directory, then $file might be empty
	394	$file = pop @dirs if $self->is_dir and not length $file;
	395
	396	### splitting ../ gives you the relative path in native syntax
	397	map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS;
	398
	399	my $prefix = File::Spec::Unix->catdir(
	400	grep { length } $vol, @dirs
	401	);
	402	return( $prefix, $file );
	403	}
	404
	405	sub _filetype {
	406	my $self = shift;
	407	my $file = shift;
	408
	409	return unless defined $file;
	410
	411	return SYMLINK if (-l $file); # Symlink
	412
	413	return FILE if (-f _); # Plain file
	414
	415	return DIR if (-d _); # Directory
	416
	417	return FIFO if (-p _); # Named pipe
	418
	419	return SOCKET if (-S _); # Socket
	420
	421	return BLOCKDEV if (-b _); # Block special
	422
	423	return CHARDEV if (-c _); # Character special
	424
	425	### shouldn't happen, this is when making archives, not reading ###
	426	return LONGLINK if ( $file eq LONGLINK_NAME );
	427
	428	return UNKNOWN; # Something else (like what?)
	429
	430	}
	431
	432	### this method 'downgrades' a file to plain file -- this is used for
	433	### symlinks when FOLLOW_SYMLINKS is true.
	434	sub _downgrade_to_plainfile {
	435	my $entry = shift;
	436	$entry->type( FILE );
	437	$entry->mode( MODE );
	438	$entry->linkname('');
	439
	440	return 1;
	441	}
	442
	443	=head2 $bool = $file->extract( [ $alternative_name ] )
	444
	445	Extract this object, optionally to an alternative name.
	446
	447	See C<< Archive::Tar->extract_file >> for details.
	448
	449	Returns true on success and false on failure.
	450
	451	=cut
	452
	453	sub extract {
	454	my $self = shift;
	455
	456	local $Carp::CarpLevel += 1;
	457
	458	return Archive::Tar->_extract_file( $self, @_ );
	459	}
	460
	461	=head2 $path = $file->full_path
	462
	463	Returns the full path from the tar header; this is basically a
	464	concatenation of the C<prefix> and C<name> fields.
	465
	466	=cut
	467
	468	sub full_path {
	469	my $self = shift;
	470
	471	### if prefix field is emtpy
	472	return $self->name unless defined $self->prefix and length $self->prefix;
	473
	474	### or otherwise, catfile'd
	475	return File::Spec::Unix->catfile( $self->prefix, $self->name );
	476	}
	477
	478
	479	=head2 $bool = $file->validate
	480
	481	Done by Archive::Tar internally when reading the tar file:
	482	validate the header against the checksum to ensure integer tar file.
	483
	484	Returns true on success, false on failure
	485
	486	=cut
	487
	488	sub validate {
	489	my $self = shift;
	490
	491	my $raw = $self->raw;
	492
	493	### don't know why this one is different from the one we /write/ ###
	494	substr ($raw, 148, 8) = " ";
	495
	496	### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
	497	### like GNU tar does. See here for details:
	498	### http://www.gnu.org/software/tar/manual/tar.html#SEC139
	499	### so we do both a signed AND unsigned validate. if one succeeds, that's
	500	### good enough
	501	return ( (unpack ("%16C*", $raw) == $self->chksum)
	502	or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
	503	}
	504
	505	=head2 $bool = $file->has_content
	506
	507	Returns a boolean to indicate whether the current object has content.
	508	Some special files like directories and so on never will have any
	509	content. This method is mainly to make sure you don't get warnings
	510	for using uninitialized values when looking at an object's content.
	511
	512	=cut
	513
	514	sub has_content {
	515	my $self = shift;
	516	return defined $self->data() && length $self->data() ? 1 : 0;
	517	}
	518
	519	=head2 $content = $file->get_content
	520
	521	Returns the current content for the in-memory file
	522
	523	=cut
	524
	525	sub get_content {
	526	my $self = shift;
	527	$self->data( );
	528	}
	529
	530	=head2 $cref = $file->get_content_by_ref
	531
	532	Returns the current content for the in-memory file as a scalar
	533	reference. Normal users won't need this, but it will save memory if
	534	you are dealing with very large data files in your tar archive, since
	535	it will pass the contents by reference, rather than make a copy of it
	536	first.
	537
	538	=cut
	539
	540	sub get_content_by_ref {
	541	my $self = shift;
	542
	543	return \$self->{data};
	544	}
	545
	546	=head2 $bool = $file->replace_content( $content )
	547
	548	Replace the current content of the file with the new content. This
	549	only affects the in-memory archive, not the on-disk version until
	550	you write it.
	551
	552	Returns true on success, false on failure.
	553
	554	=cut
	555
	556	sub replace_content {
	557	my $self = shift;
	558	my $data = shift \|\| '';
	559
	560	$self->data( $data );
	561	$self->size( length $data );
	562	return 1;
	563	}
	564
	565	=head2 $bool = $file->rename( $new_name )
	566
	567	Rename the current file to $new_name.
	568
	569	Note that you must specify a Unix path for $new_name, since per tar
	570	standard, all files in the archive must be Unix paths.
	571
	572	Returns true on success and false on failure.
	573
	574	=cut
	575
	576	sub rename {
	577	my $self = shift;
	578	my $path = shift;
	579
	580	return unless defined $path;
	581
	582	my ($prefix,$file) = $self->_prefix_and_file( $path );
	583
	584	$self->name( $file );
	585	$self->prefix( $prefix );
	586
	587	return 1;
	588	}
	589
	590	=head1 Convenience methods
	591
	592	To quickly check the type of a C<Archive::Tar::File> object, you can
	593	use the following methods:
	594
	595	=over 4
	596
	597	=item $file->is_file
	598
	599	Returns true if the file is of type C<file>
	600
	601	=item $file->is_dir
	602
	603	Returns true if the file is of type C<dir>
	604
	605	=item $file->is_hardlink
	606
	607	Returns true if the file is of type C<hardlink>
	608
	609	=item $file->is_symlink
	610
	611	Returns true if the file is of type C<symlink>
	612
	613	=item $file->is_chardev
	614
	615	Returns true if the file is of type C<chardev>
	616
	617	=item $file->is_blockdev
	618
	619	Returns true if the file is of type C<blockdev>
	620
	621	=item $file->is_fifo
	622
	623	Returns true if the file is of type C<fifo>
	624
	625	=item $file->is_socket
	626
	627	Returns true if the file is of type C<socket>
	628
	629	=item $file->is_longlink
	630
	631	Returns true if the file is of type C<LongLink>.
	632	Should not happen after a successful C<read>.
	633
	634	=item $file->is_label
	635
	636	Returns true if the file is of type C<Label>.
	637	Should not happen after a successful C<read>.
	638
	639	=item $file->is_unknown
	640
	641	Returns true if the file type is C<unknown>
	642
	643	=back
	644
	645	=cut
	646
	647	#stupid perl5.5.3 needs to warn if it's not numeric
	648	sub is_file { local $^W; FILE == $_[0]->type }
	649	sub is_dir { local $^W; DIR == $_[0]->type }
	650	sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
	651	sub is_symlink { local $^W; SYMLINK == $_[0]->type }
	652	sub is_chardev { local $^W; CHARDEV == $_[0]->type }
	653	sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
	654	sub is_fifo { local $^W; FIFO == $_[0]->type }
	655	sub is_socket { local $^W; SOCKET == $_[0]->type }
	656	sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
	657	sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
	658	sub is_label { local $^W; LABEL eq $_[0]->type }
	659
	660	1;