Commit | Line | Data |
---|---|---|
39713df4 | 1 | ### the gnu tar specification: |
f38c1908 | 2 | ### http://www.gnu.org/software/tar/manual/tar.html |
39713df4 RGS |
3 | ### |
4 | ### and the pax format spec, which tar derives from: | |
5 | ### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html | |
6 | ||
7 | package Archive::Tar; | |
8 | require 5.005_03; | |
9 | ||
642eb381 SH |
10 | use Cwd; |
11 | use IO::Zlib; | |
12 | use IO::File; | |
13 | use Carp qw(carp croak); | |
14 | use File::Spec (); | |
15 | use File::Spec::Unix (); | |
16 | use File::Path (); | |
17 | ||
18 | use Archive::Tar::File; | |
19 | use Archive::Tar::Constant; | |
20 | ||
21 | require Exporter; | |
22 | ||
39713df4 RGS |
23 | use strict; |
24 | use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD | |
1c82faa7 | 25 | $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS |
d33cd7cf | 26 | $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT |
178aef9a RGS |
27 | ]; |
28 | ||
642eb381 | 29 | @ISA = qw[Exporter]; |
bef46b70 | 30 | @EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ]; |
178aef9a RGS |
31 | $DEBUG = 0; |
32 | $WARN = 1; | |
33 | $FOLLOW_SYMLINK = 0; | |
f8c9502f | 34 | $VERSION = "1.82"; |
178aef9a RGS |
35 | $CHOWN = 1; |
36 | $CHMOD = 1; | |
1c82faa7 | 37 | $SAME_PERMISSIONS = $> == 0 ? 1 : 0; |
178aef9a RGS |
38 | $DO_NOT_USE_PREFIX = 0; |
39 | $INSECURE_EXTRACT_MODE = 0; | |
d33cd7cf | 40 | $ZERO_PAD_NUMBERS = 0; |
39713df4 RGS |
41 | |
42 | BEGIN { | |
43 | use Config; | |
44 | $HAS_PERLIO = $Config::Config{useperlio}; | |
45 | ||
46 | ### try and load IO::String anyway, so you can dynamically | |
47 | ### switch between perlio and IO::String | |
642eb381 | 48 | $HAS_IO_STRING = eval { |
39713df4 RGS |
49 | require IO::String; |
50 | import IO::String; | |
642eb381 SH |
51 | 1; |
52 | } || 0; | |
39713df4 RGS |
53 | } |
54 | ||
39713df4 RGS |
55 | =head1 NAME |
56 | ||
57 | Archive::Tar - module for manipulations of tar archives | |
58 | ||
59 | =head1 SYNOPSIS | |
60 | ||
61 | use Archive::Tar; | |
62 | my $tar = Archive::Tar->new; | |
63 | ||
642eb381 | 64 | $tar->read('origin.tgz'); |
39713df4 RGS |
65 | $tar->extract(); |
66 | ||
67 | $tar->add_files('file/foo.pl', 'docs/README'); | |
68 | $tar->add_data('file/baz.txt', 'This is the contents now'); | |
69 | ||
70 | $tar->rename('oldname', 'new/file/name'); | |
93e94d8a CBW |
71 | $tar->chown('/', 'root'); |
72 | $tar->chown('/', 'root:root'); | |
1e915955 | 73 | $tar->chmod('/tmp', '1777'); |
39713df4 | 74 | |
642eb381 | 75 | $tar->write('files.tar'); # plain tar |
bef46b70 JB |
76 | $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed |
77 | $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed | |
39713df4 RGS |
78 | |
79 | =head1 DESCRIPTION | |
80 | ||
81 | Archive::Tar provides an object oriented mechanism for handling tar | |
82 | files. It provides class methods for quick and easy files handling | |
83 | while also allowing for the creation of tar file objects for custom | |
84 | manipulation. If you have the IO::Zlib module installed, | |
85 | Archive::Tar will also support compressed or gzipped tar files. | |
86 | ||
87 | An object of class Archive::Tar represents a .tar(.gz) archive full | |
88 | of files and things. | |
89 | ||
90 | =head1 Object Methods | |
91 | ||
92 | =head2 Archive::Tar->new( [$file, $compressed] ) | |
93 | ||
94 | Returns a new Tar object. If given any arguments, C<new()> calls the | |
95 | C<read()> method automatically, passing on the arguments provided to | |
96 | the C<read()> method. | |
97 | ||
98 | If C<new()> is invoked with arguments and the C<read()> method fails | |
99 | for any reason, C<new()> returns undef. | |
100 | ||
101 | =cut | |
102 | ||
103 | my $tmpl = { | |
104 | _data => [ ], | |
105 | _file => 'Unknown', | |
106 | }; | |
107 | ||
108 | ### install get/set accessors for this object. | |
109 | for my $key ( keys %$tmpl ) { | |
110 | no strict 'refs'; | |
111 | *{__PACKAGE__."::$key"} = sub { | |
112 | my $self = shift; | |
113 | $self->{$key} = $_[0] if @_; | |
114 | return $self->{$key}; | |
115 | } | |
116 | } | |
117 | ||
118 | sub new { | |
119 | my $class = shift; | |
120 | $class = ref $class if ref $class; | |
121 | ||
122 | ### copying $tmpl here since a shallow copy makes it use the | |
123 | ### same aref, causing for files to remain in memory always. | |
941cb2bb | 124 | my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; |
39713df4 RGS |
125 | |
126 | if (@_) { | |
81a5970e RGS |
127 | unless ( $obj->read( @_ ) ) { |
128 | $obj->_error(qq[No data could be read from file]); | |
129 | return; | |
130 | } | |
39713df4 RGS |
131 | } |
132 | ||
133 | return $obj; | |
134 | } | |
135 | ||
642eb381 | 136 | =head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) |
39713df4 RGS |
137 | |
138 | Read the given tar file into memory. | |
139 | The first argument can either be the name of a file or a reference to | |
140 | an already open filehandle (or an IO::Zlib object if it's compressed) | |
39713df4 RGS |
141 | |
142 | The C<read> will I<replace> any previous content in C<$tar>! | |
143 | ||
e0d68803 | 144 | The second argument may be considered optional, but remains for |
642eb381 SH |
145 | backwards compatibility. Archive::Tar now looks at the file |
146 | magic to determine what class should be used to open the file | |
147 | and will transparently Do The Right Thing. | |
148 | ||
149 | Archive::Tar will warn if you try to pass a bzip2 compressed file and the | |
150 | IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. | |
39713df4 | 151 | |
b3200c5d | 152 | Note that you can currently B<not> pass a C<gzip> compressed |
642eb381 SH |
153 | filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed |
154 | filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string | |
b3200c5d SP |
155 | containing the full archive information (either compressed or |
156 | uncompressed). These are worth while features, but not currently | |
157 | implemented. See the C<TODO> section. | |
158 | ||
39713df4 RGS |
159 | The third argument can be a hash reference with options. Note that |
160 | all options are case-sensitive. | |
161 | ||
162 | =over 4 | |
163 | ||
164 | =item limit | |
165 | ||
166 | Do not read more than C<limit> files. This is useful if you have | |
167 | very big archives, and are only interested in the first few files. | |
168 | ||
642eb381 SH |
169 | =item filter |
170 | ||
171 | Can be set to a regular expression. Only files with names that match | |
172 | the expression will be read. | |
173 | ||
f8c9502f CBW |
174 | =item md5 |
175 | ||
176 | Set to 1 and the md5sum of files will be returned (instead of file data) | |
177 | my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); | |
178 | while( my $f = $iter->() ) { | |
179 | print $f->data . "\t" . $f->full_path . $/; | |
180 | } | |
181 | ||
39713df4 RGS |
182 | =item extract |
183 | ||
184 | If set to true, immediately extract entries when reading them. This | |
185 | gives you the same memory break as the C<extract_archive> function. | |
186 | Note however that entries will not be read into memory, but written | |
e0d68803 | 187 | straight to disk. This means no C<Archive::Tar::File> objects are |
642eb381 | 188 | created for you to inspect. |
39713df4 RGS |
189 | |
190 | =back | |
191 | ||
192 | All files are stored internally as C<Archive::Tar::File> objects. | |
193 | Please consult the L<Archive::Tar::File> documentation for details. | |
194 | ||
195 | Returns the number of files read in scalar context, and a list of | |
196 | C<Archive::Tar::File> objects in list context. | |
197 | ||
198 | =cut | |
199 | ||
200 | sub read { | |
201 | my $self = shift; | |
202 | my $file = shift; | |
203 | my $gzip = shift || 0; | |
204 | my $opts = shift || {}; | |
205 | ||
206 | unless( defined $file ) { | |
207 | $self->_error( qq[No file to read from!] ); | |
208 | return; | |
209 | } else { | |
210 | $self->_file( $file ); | |
211 | } | |
212 | ||
213 | my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) | |
214 | or return; | |
215 | ||
216 | my $data = $self->_read_tar( $handle, $opts ) or return; | |
217 | ||
218 | $self->_data( $data ); | |
219 | ||
220 | return wantarray ? @$data : scalar @$data; | |
221 | } | |
222 | ||
223 | sub _get_handle { | |
642eb381 SH |
224 | my $self = shift; |
225 | my $file = shift; return unless defined $file; | |
642eb381 SH |
226 | my $compress = shift || 0; |
227 | my $mode = shift || READ_ONLY->( ZLIB ); # default to read only | |
228 | ||
afabe0e8 CBW |
229 | ### Check if file is a file handle or IO glob |
230 | if ( ref $file ) { | |
231 | return $file if eval{ *$file{IO} }; | |
232 | return $file if eval{ $file->isa(q{IO::Handle}) }; | |
233 | $file = q{}.$file; | |
234 | } | |
642eb381 SH |
235 | |
236 | ### get a FH opened to the right class, so we can use it transparently | |
237 | ### throughout the program | |
238 | my $fh; | |
239 | { ### reading magic only makes sense if we're opening a file for | |
240 | ### reading. otherwise, just use what the user requested. | |
241 | my $magic = ''; | |
242 | if( MODE_READ->($mode) ) { | |
243 | open my $tmp, $file or do { | |
244 | $self->_error( qq[Could not open '$file' for reading: $!] ); | |
245 | return; | |
246 | }; | |
e0d68803 | 247 | |
642eb381 SH |
248 | ### read the first 4 bites of the file to figure out which class to |
249 | ### use to open the file. | |
e0d68803 | 250 | sysread( $tmp, $magic, 4 ); |
642eb381 SH |
251 | close $tmp; |
252 | } | |
39713df4 | 253 | |
642eb381 SH |
254 | ### is it bzip? |
255 | ### if you asked specifically for bzip compression, or if we're in | |
256 | ### read mode and the magic numbers add up, use bzip | |
257 | if( BZIP and ( | |
e0d68803 | 258 | ($compress eq COMPRESS_BZIP) or |
642eb381 SH |
259 | ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) |
260 | ) | |
261 | ) { | |
e0d68803 | 262 | |
642eb381 SH |
263 | ### different reader/writer modules, different error vars... sigh |
264 | if( MODE_READ->($mode) ) { | |
265 | $fh = IO::Uncompress::Bunzip2->new( $file ) or do { | |
266 | $self->_error( qq[Could not read '$file': ] . | |
267 | $IO::Uncompress::Bunzip2::Bunzip2Error | |
268 | ); | |
269 | return; | |
270 | }; | |
e0d68803 | 271 | |
642eb381 SH |
272 | } else { |
273 | $fh = IO::Compress::Bzip2->new( $file ) or do { | |
274 | $self->_error( qq[Could not write to '$file': ] . | |
275 | $IO::Compress::Bzip2::Bzip2Error | |
276 | ); | |
277 | return; | |
278 | }; | |
279 | } | |
e0d68803 | 280 | |
642eb381 SH |
281 | ### is it gzip? |
282 | ### if you asked for compression, if you wanted to read or the gzip | |
283 | ### magic number is present (redundant with read) | |
284 | } elsif( ZLIB and ( | |
285 | $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM | |
e0d68803 | 286 | ) |
642eb381 SH |
287 | ) { |
288 | $fh = IO::Zlib->new; | |
39713df4 | 289 | |
642eb381 SH |
290 | unless( $fh->open( $file, $mode ) ) { |
291 | $self->_error(qq[Could not create filehandle for '$file': $!]); | |
292 | return; | |
293 | } | |
e0d68803 | 294 | |
642eb381 | 295 | ### is it plain tar? |
39713df4 | 296 | } else { |
642eb381 | 297 | $fh = IO::File->new; |
39713df4 | 298 | |
642eb381 SH |
299 | unless( $fh->open( $file, $mode ) ) { |
300 | $self->_error(qq[Could not create filehandle for '$file': $!]); | |
301 | return; | |
302 | } | |
39713df4 | 303 | |
642eb381 SH |
304 | ### enable bin mode on tar archives |
305 | binmode $fh; | |
e0d68803 | 306 | } |
642eb381 | 307 | } |
39713df4 RGS |
308 | |
309 | return $fh; | |
310 | } | |
311 | ||
642eb381 | 312 | |
39713df4 RGS |
313 | sub _read_tar { |
314 | my $self = shift; | |
315 | my $handle = shift or return; | |
316 | my $opts = shift || {}; | |
317 | ||
318 | my $count = $opts->{limit} || 0; | |
642eb381 | 319 | my $filter = $opts->{filter}; |
f8c9502f | 320 | my $md5 = $opts->{md5} || 0; # cdrake |
afabe0e8 | 321 | my $filter_cb = $opts->{filter_cb}; |
39713df4 RGS |
322 | my $extract = $opts->{extract} || 0; |
323 | ||
324 | ### set a cap on the amount of files to extract ### | |
325 | my $limit = 0; | |
326 | $limit = 1 if $count > 0; | |
327 | ||
328 | my $tarfile = [ ]; | |
329 | my $chunk; | |
330 | my $read = 0; | |
331 | my $real_name; # to set the name of a file when | |
332 | # we're encountering @longlink | |
333 | my $data; | |
334 | ||
335 | LOOP: | |
336 | while( $handle->read( $chunk, HEAD ) ) { | |
337 | ### IO::Zlib doesn't support this yet | |
338 | my $offset = eval { tell $handle } || 'unknown'; | |
d33cd7cf | 339 | $@ = ''; |
39713df4 RGS |
340 | |
341 | unless( $read++ ) { | |
342 | my $gzip = GZIP_MAGIC_NUM; | |
343 | if( $chunk =~ /$gzip/ ) { | |
344 | $self->_error( qq[Cannot read compressed format in tar-mode] ); | |
345 | return; | |
346 | } | |
93e94d8a | 347 | |
4feb3b72 JB |
348 | ### size is < HEAD, which means a corrupted file, as the minimum |
349 | ### length is _at least_ HEAD | |
350 | if (length $chunk != HEAD) { | |
351 | $self->_error( qq[Cannot read enough bytes from the tarfile] ); | |
352 | return; | |
353 | } | |
39713df4 RGS |
354 | } |
355 | ||
356 | ### if we can't read in all bytes... ### | |
357 | last if length $chunk != HEAD; | |
358 | ||
359 | ### Apparently this should really be two blocks of 512 zeroes, | |
360 | ### but GNU tar sometimes gets it wrong. See comment in the | |
361 | ### source code (tar.c) to GNU cpio. | |
362 | next if $chunk eq TAR_END; | |
363 | ||
b30bcf62 RGS |
364 | ### according to the posix spec, the last 12 bytes of the header are |
365 | ### null bytes, to pad it to a 512 byte block. That means if these | |
eadbb00b | 366 | ### bytes are NOT null bytes, it's a corrupt header. See: |
b30bcf62 RGS |
367 | ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx |
368 | ### line 111 | |
369 | { my $nulls = join '', "\0" x 12; | |
370 | unless( $nulls eq substr( $chunk, 500, 12 ) ) { | |
371 | $self->_error( qq[Invalid header block at offset $offset] ); | |
372 | next LOOP; | |
373 | } | |
374 | } | |
375 | ||
81a5970e RGS |
376 | ### pass the realname, so we can set it 'proper' right away |
377 | ### some of the heuristics are done on the name, so important | |
378 | ### to set it ASAP | |
39713df4 | 379 | my $entry; |
81a5970e RGS |
380 | { my %extra_args = (); |
381 | $extra_args{'name'} = $$real_name if defined $real_name; | |
e0d68803 JB |
382 | |
383 | unless( $entry = Archive::Tar::File->new( chunk => $chunk, | |
384 | %extra_args ) | |
81a5970e RGS |
385 | ) { |
386 | $self->_error( qq[Couldn't read chunk at offset $offset] ); | |
b30bcf62 | 387 | next LOOP; |
81a5970e | 388 | } |
39713df4 RGS |
389 | } |
390 | ||
391 | ### ignore labels: | |
d33cd7cf | 392 | ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 |
39713df4 RGS |
393 | next if $entry->is_label; |
394 | ||
395 | if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { | |
396 | ||
397 | if ( $entry->is_file && !$entry->validate ) { | |
398 | ### sometimes the chunk is rather fux0r3d and a whole 512 | |
c3745331 | 399 | ### bytes ends up in the ->name area. |
39713df4 RGS |
400 | ### clean it up, if need be |
401 | my $name = $entry->name; | |
402 | $name = substr($name, 0, 100) if length $name > 100; | |
403 | $name =~ s/\n/ /g; | |
404 | ||
405 | $self->_error( $name . qq[: checksum error] ); | |
406 | next LOOP; | |
407 | } | |
408 | ||
409 | my $block = BLOCK_SIZE->( $entry->size ); | |
410 | ||
411 | $data = $entry->get_content_by_ref; | |
412 | ||
afabe0e8 | 413 | my $skip = 0; |
f8c9502f | 414 | my $ctx; # cdrake |
afabe0e8 | 415 | ### skip this entry if we're filtering |
f8c9502f CBW |
416 | |
417 | if($md5) { # cdrake | |
418 | $ctx = Digest::MD5->new; # cdrake | |
419 | $skip=5; # cdrake | |
420 | ||
421 | } elsif ($filter && $entry->name !~ $filter) { | |
afabe0e8 CBW |
422 | $skip = 1; |
423 | ||
424 | ### skip this entry if it's a pax header. This is a special file added | |
425 | ### by, among others, git-generated tarballs. It holds comments and is | |
426 | ### not meant for extracting. See #38932: pax_global_header extracted | |
c3e6accb | 427 | } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { |
afabe0e8 CBW |
428 | $skip = 2; |
429 | } elsif ($filter_cb && ! $filter_cb->($entry)) { | |
430 | $skip = 3; | |
431 | } | |
432 | ||
433 | if ($skip) { | |
434 | # | |
435 | # Since we're skipping, do not allocate memory for the | |
93e94d8a | 436 | # whole file. Read it 64 BLOCKS at a time. Do not |
afabe0e8 CBW |
437 | # complete the skip yet because maybe what we read is a |
438 | # longlink and it won't get skipped after all | |
439 | # | |
440 | my $amt = $block; | |
f8c9502f | 441 | my $fsz=$entry->size; # cdrake |
afabe0e8 CBW |
442 | while ($amt > 0) { |
443 | $$data = ''; | |
444 | my $this = 64 * BLOCK; | |
445 | $this = $amt if $this > $amt; | |
446 | if( $handle->read( $$data, $this ) < $this ) { | |
447 | $self->_error( qq[Read error on tarfile (missing data) ']. | |
448 | $entry->full_path ."' at offset $offset" ); | |
449 | next LOOP; | |
450 | } | |
451 | $amt -= $this; | |
f8c9502f CBW |
452 | $fsz -= $this; # cdrake |
453 | substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake | |
454 | $ctx->add($$data) if($skip==5); # cdrake | |
afabe0e8 | 455 | } |
f8c9502f | 456 | $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake |
afabe0e8 CBW |
457 | } else { |
458 | ||
459 | ### just read everything into memory | |
460 | ### can't do lazy loading since IO::Zlib doesn't support 'seek' | |
461 | ### this is because Compress::Zlib doesn't support it =/ | |
462 | ### this reads in the whole data in one read() call. | |
463 | if ( $handle->read( $$data, $block ) < $block ) { | |
464 | $self->_error( qq[Read error on tarfile (missing data) ']. | |
39713df4 | 465 | $entry->full_path ."' at offset $offset" ); |
afabe0e8 CBW |
466 | next LOOP; |
467 | } | |
468 | ### throw away trailing garbage ### | |
469 | substr ($$data, $entry->size) = "" if defined $$data; | |
39713df4 RGS |
470 | } |
471 | ||
39713df4 RGS |
472 | ### part II of the @LongLink munging -- need to do /after/ |
473 | ### the checksum check. | |
474 | if( $entry->is_longlink ) { | |
475 | ### weird thing in tarfiles -- if the file is actually a | |
476 | ### @LongLink, the data part seems to have a trailing ^@ | |
477 | ### (unprintable) char. to display, pipe output through less. | |
478 | ### but that doesn't *always* happen.. so check if the last | |
479 | ### character is a control character, and if so remove it | |
480 | ### at any rate, we better remove that character here, or tests | |
481 | ### like 'eq' and hashlook ups based on names will SO not work | |
482 | ### remove it by calculating the proper size, and then | |
483 | ### tossing out everything that's longer than that size. | |
484 | ||
485 | ### count number of nulls | |
486 | my $nulls = $$data =~ tr/\0/\0/; | |
487 | ||
488 | ### cut data + size by that many bytes | |
489 | $entry->size( $entry->size - $nulls ); | |
490 | substr ($$data, $entry->size) = ""; | |
491 | } | |
492 | } | |
493 | ||
494 | ### clean up of the entries.. posix tar /apparently/ has some | |
495 | ### weird 'feature' that allows for filenames > 255 characters | |
496 | ### they'll put a header in with as name '././@LongLink' and the | |
497 | ### contents will be the name of the /next/ file in the archive | |
498 | ### pretty crappy and kludgy if you ask me | |
499 | ||
500 | ### set the name for the next entry if this is a @LongLink; | |
501 | ### this is one ugly hack =/ but needed for direct extraction | |
502 | if( $entry->is_longlink ) { | |
503 | $real_name = $data; | |
b30bcf62 | 504 | next LOOP; |
39713df4 RGS |
505 | } elsif ( defined $real_name ) { |
506 | $entry->name( $$real_name ); | |
507 | $entry->prefix(''); | |
508 | undef $real_name; | |
509 | } | |
510 | ||
afabe0e8 CBW |
511 | if ($filter && $entry->name !~ $filter) { |
512 | next LOOP; | |
e0d68803 | 513 | |
afabe0e8 CBW |
514 | ### skip this entry if it's a pax header. This is a special file added |
515 | ### by, among others, git-generated tarballs. It holds comments and is | |
516 | ### not meant for extracting. See #38932: pax_global_header extracted | |
c3e6accb | 517 | } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { |
afabe0e8 CBW |
518 | next LOOP; |
519 | } elsif ($filter_cb && ! $filter_cb->($entry)) { | |
520 | next LOOP; | |
521 | } | |
e0d68803 | 522 | |
d33cd7cf CBW |
523 | if ( $extract && !$entry->is_longlink |
524 | && !$entry->is_unknown | |
525 | && !$entry->is_label ) { | |
526 | $self->_extract_file( $entry ) or return; | |
527 | } | |
39713df4 RGS |
528 | |
529 | ### Guard against tarfiles with garbage at the end | |
530 | last LOOP if $entry->name eq ''; | |
531 | ||
532 | ### push only the name on the rv if we're extracting | |
533 | ### -- for extract_archive | |
534 | push @$tarfile, ($extract ? $entry->name : $entry); | |
535 | ||
536 | if( $limit ) { | |
537 | $count-- unless $entry->is_longlink || $entry->is_dir; | |
538 | last LOOP unless $count; | |
539 | } | |
540 | } continue { | |
541 | undef $data; | |
542 | } | |
543 | ||
544 | return $tarfile; | |
545 | } | |
546 | ||
547 | =head2 $tar->contains_file( $filename ) | |
548 | ||
549 | Check if the archive contains a certain file. | |
550 | It will return true if the file is in the archive, false otherwise. | |
551 | ||
552 | Note however, that this function does an exact match using C<eq> | |
553 | on the full path. So it cannot compensate for case-insensitive file- | |
554 | systems or compare 2 paths to see if they would point to the same | |
555 | underlying file. | |
556 | ||
557 | =cut | |
558 | ||
559 | sub contains_file { | |
560 | my $self = shift; | |
01d11a1c | 561 | my $full = shift; |
e0d68803 | 562 | |
01d11a1c | 563 | return unless defined $full; |
39713df4 | 564 | |
c3745331 RGS |
565 | ### don't warn if the entry isn't there.. that's what this function |
566 | ### is for after all. | |
567 | local $WARN = 0; | |
39713df4 RGS |
568 | return 1 if $self->_find_entry($full); |
569 | return; | |
570 | } | |
571 | ||
572 | =head2 $tar->extract( [@filenames] ) | |
573 | ||
574 | Write files whose names are equivalent to any of the names in | |
575 | C<@filenames> to disk, creating subdirectories as necessary. This | |
576 | might not work too well under VMS. | |
577 | Under MacPerl, the file's modification time will be converted to the | |
578 | MacOS zero of time, and appropriate conversions will be done to the | |
579 | path. However, the length of each element of the path is not | |
580 | inspected to see whether it's longer than MacOS currently allows (32 | |
581 | characters). | |
582 | ||
583 | If C<extract> is called without a list of file names, the entire | |
584 | contents of the archive are extracted. | |
585 | ||
586 | Returns a list of filenames extracted. | |
587 | ||
588 | =cut | |
589 | ||
590 | sub extract { | |
591 | my $self = shift; | |
b30bcf62 | 592 | my @args = @_; |
39713df4 RGS |
593 | my @files; |
594 | ||
f38c1908 SP |
595 | # use the speed optimization for all extracted files |
596 | local($self->{cwd}) = cwd() unless $self->{cwd}; | |
597 | ||
eadbb00b | 598 | ### you requested the extraction of only certain files |
b30bcf62 RGS |
599 | if( @args ) { |
600 | for my $file ( @args ) { | |
e0d68803 | 601 | |
b30bcf62 RGS |
602 | ### it's already an object? |
603 | if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { | |
604 | push @files, $file; | |
605 | next; | |
39713df4 | 606 | |
b30bcf62 RGS |
607 | ### go find it then |
608 | } else { | |
e0d68803 | 609 | |
b30bcf62 RGS |
610 | my $found; |
611 | for my $entry ( @{$self->_data} ) { | |
612 | next unless $file eq $entry->full_path; | |
e0d68803 | 613 | |
b30bcf62 RGS |
614 | ### we found the file you're looking for |
615 | push @files, $entry; | |
616 | $found++; | |
617 | } | |
e0d68803 | 618 | |
b30bcf62 | 619 | unless( $found ) { |
e0d68803 | 620 | return $self->_error( |
b30bcf62 RGS |
621 | qq[Could not find '$file' in archive] ); |
622 | } | |
39713df4 RGS |
623 | } |
624 | } | |
625 | ||
626 | ### just grab all the file items | |
627 | } else { | |
628 | @files = $self->get_files; | |
629 | } | |
630 | ||
631 | ### nothing found? that's an error | |
632 | unless( scalar @files ) { | |
633 | $self->_error( qq[No files found for ] . $self->_file ); | |
634 | return; | |
635 | } | |
636 | ||
637 | ### now extract them | |
638 | for my $entry ( @files ) { | |
639 | unless( $self->_extract_file( $entry ) ) { | |
640 | $self->_error(q[Could not extract ']. $entry->full_path .q['] ); | |
641 | return; | |
642 | } | |
643 | } | |
644 | ||
645 | return @files; | |
646 | } | |
647 | ||
648 | =head2 $tar->extract_file( $file, [$extract_path] ) | |
649 | ||
650 | Write an entry, whose name is equivalent to the file name provided to | |
48e76d2d | 651 | disk. Optionally takes a second parameter, which is the full native |
39713df4 RGS |
652 | path (including filename) the entry will be written to. |
653 | ||
654 | For example: | |
655 | ||
656 | $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); | |
657 | ||
b30bcf62 RGS |
658 | $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); |
659 | ||
39713df4 RGS |
660 | Returns true on success, false on failure. |
661 | ||
662 | =cut | |
663 | ||
664 | sub extract_file { | |
665 | my $self = shift; | |
01d11a1c | 666 | my $file = shift; return unless defined $file; |
39713df4 RGS |
667 | my $alt = shift; |
668 | ||
669 | my $entry = $self->_find_entry( $file ) | |
670 | or $self->_error( qq[Could not find an entry for '$file'] ), return; | |
671 | ||
672 | return $self->_extract_file( $entry, $alt ); | |
673 | } | |
674 | ||
675 | sub _extract_file { | |
676 | my $self = shift; | |
677 | my $entry = shift or return; | |
678 | my $alt = shift; | |
39713df4 RGS |
679 | |
680 | ### you wanted an alternate extraction location ### | |
681 | my $name = defined $alt ? $alt : $entry->full_path; | |
682 | ||
683 | ### splitpath takes a bool at the end to indicate | |
684 | ### that it's splitting a dir | |
7f10f74b SH |
685 | my ($vol,$dirs,$file); |
686 | if ( defined $alt ) { # It's a local-OS path | |
687 | ($vol,$dirs,$file) = File::Spec->splitpath( $alt, | |
688 | $entry->is_dir ); | |
689 | } else { | |
690 | ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, | |
691 | $entry->is_dir ); | |
692 | } | |
693 | ||
39713df4 RGS |
694 | my $dir; |
695 | ### is $name an absolute path? ### | |
642eb381 | 696 | if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { |
178aef9a RGS |
697 | |
698 | ### absolute names are not allowed to be in tarballs under | |
699 | ### strict mode, so only allow it if a user tells us to do it | |
700 | if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { | |
e0d68803 | 701 | $self->_error( |
178aef9a RGS |
702 | q[Entry ']. $entry->full_path .q[' is an absolute path. ]. |
703 | q[Not extracting absolute paths under SECURE EXTRACT MODE] | |
e0d68803 | 704 | ); |
178aef9a RGS |
705 | return; |
706 | } | |
e0d68803 | 707 | |
178aef9a | 708 | ### user asked us to, it's fine. |
642eb381 | 709 | $dir = File::Spec->catpath( $vol, $dirs, "" ); |
39713df4 RGS |
710 | |
711 | ### it's a relative path ### | |
712 | } else { | |
e0d68803 JB |
713 | my $cwd = (ref $self and defined $self->{cwd}) |
714 | ? $self->{cwd} | |
642eb381 | 715 | : cwd(); |
f5afd28d | 716 | |
f5afd28d NC |
717 | my @dirs = defined $alt |
718 | ? File::Spec->splitdir( $dirs ) # It's a local-OS path | |
719 | : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely | |
720 | # straight from the tarball | |
178aef9a | 721 | |
e0d68803 JB |
722 | if( not defined $alt and |
723 | not $INSECURE_EXTRACT_MODE | |
724 | ) { | |
642eb381 SH |
725 | |
726 | ### paths that leave the current directory are not allowed under | |
727 | ### strict mode, so only allow it if a user tells us to do this. | |
728 | if( grep { $_ eq '..' } @dirs ) { | |
e0d68803 | 729 | |
642eb381 SH |
730 | $self->_error( |
731 | q[Entry ']. $entry->full_path .q[' is attempting to leave ]. | |
732 | q[the current working directory. Not extracting under ]. | |
733 | q[SECURE EXTRACT MODE] | |
734 | ); | |
735 | return; | |
e0d68803 JB |
736 | } |
737 | ||
642eb381 SH |
738 | ### the archive may be asking us to extract into a symlink. This |
739 | ### is not sane and a possible security issue, as outlined here: | |
740 | ### https://rt.cpan.org/Ticket/Display.html?id=30380 | |
741 | ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 | |
742 | ### https://issues.rpath.com/browse/RPL-1716 | |
743 | my $full_path = $cwd; | |
744 | for my $d ( @dirs ) { | |
745 | $full_path = File::Spec->catdir( $full_path, $d ); | |
e0d68803 | 746 | |
642eb381 SH |
747 | ### we've already checked this one, and it's safe. Move on. |
748 | next if ref $self and $self->{_link_cache}->{$full_path}; | |
749 | ||
750 | if( -l $full_path ) { | |
751 | my $to = readlink $full_path; | |
752 | my $diag = "symlinked directory ($full_path => $to)"; | |
753 | ||
754 | $self->_error( | |
755 | q[Entry ']. $entry->full_path .q[' is attempting to ]. | |
756 | qq[extract to a $diag. This is considered a security ]. | |
757 | q[vulnerability and not allowed under SECURE EXTRACT ]. | |
758 | q[MODE] | |
759 | ); | |
760 | return; | |
761 | } | |
e0d68803 | 762 | |
642eb381 SH |
763 | ### XXX keep a cache if possible, so the stats become cheaper: |
764 | $self->{_link_cache}->{$full_path} = 1 if ref $self; | |
765 | } | |
766 | } | |
767 | ||
2610e7a4 JB |
768 | ### '.' is the directory delimiter on VMS, which has to be escaped |
769 | ### or changed to '_' on vms. vmsify is used, because older versions | |
770 | ### of vmspath do not handle this properly. | |
771 | ### Must not add a '/' to an empty directory though. | |
e0d68803 | 772 | map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; |
f5afd28d | 773 | |
e0d68803 | 774 | my ($cwd_vol,$cwd_dir,$cwd_file) |
48e76d2d CB |
775 | = File::Spec->splitpath( $cwd ); |
776 | my @cwd = File::Spec->splitdir( $cwd_dir ); | |
777 | push @cwd, $cwd_file if length $cwd_file; | |
81a5970e | 778 | |
eadbb00b | 779 | ### We need to pass '' as the last element to catpath. Craig Berry |
f5afd28d | 780 | ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): |
e0d68803 | 781 | ### The root problem is that splitpath on UNIX always returns the |
f5afd28d NC |
782 | ### final path element as a file even if it is a directory, and of |
783 | ### course there is no way it can know the difference without checking | |
784 | ### against the filesystem, which it is documented as not doing. When | |
785 | ### you turn around and call catpath, on VMS you have to know which bits | |
786 | ### are directory bits and which bits are file bits. In this case we | |
787 | ### know the result should be a directory. I had thought you could omit | |
788 | ### the file argument to catpath in such a case, but apparently on UNIX | |
789 | ### you can't. | |
e0d68803 JB |
790 | $dir = File::Spec->catpath( |
791 | $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' | |
f5afd28d NC |
792 | ); |
793 | ||
e0d68803 | 794 | ### catdir() returns undef if the path is longer than 255 chars on |
2610e7a4 | 795 | ### older VMS systems. |
81a5970e RGS |
796 | unless ( defined $dir ) { |
797 | $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); | |
798 | return; | |
799 | } | |
800 | ||
39713df4 RGS |
801 | } |
802 | ||
803 | if( -e $dir && !-d _ ) { | |
804 | $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); | |
805 | return; | |
806 | } | |
807 | ||
808 | unless ( -d _ ) { | |
809 | eval { File::Path::mkpath( $dir, 0, 0777 ) }; | |
810 | if( $@ ) { | |
642eb381 SH |
811 | my $fp = $entry->full_path; |
812 | $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); | |
39713df4 RGS |
813 | return; |
814 | } | |
e0d68803 | 815 | |
c3745331 RGS |
816 | ### XXX chown here? that might not be the same as in the archive |
817 | ### as we're only chown'ing to the owner of the file we're extracting | |
818 | ### not to the owner of the directory itself, which may or may not | |
819 | ### be another entry in the archive | |
820 | ### Answer: no, gnu tar doesn't do it either, it'd be the wrong | |
821 | ### way to go. | |
822 | #if( $CHOWN && CAN_CHOWN ) { | |
823 | # chown $entry->uid, $entry->gid, $dir or | |
824 | # $self->_error( qq[Could not set uid/gid on '$dir'] ); | |
825 | #} | |
39713df4 RGS |
826 | } |
827 | ||
828 | ### we're done if we just needed to create a dir ### | |
829 | return 1 if $entry->is_dir; | |
830 | ||
831 | my $full = File::Spec->catfile( $dir, $file ); | |
832 | ||
833 | if( $entry->is_unknown ) { | |
834 | $self->_error( qq[Unknown file type for file '$full'] ); | |
835 | return; | |
836 | } | |
837 | ||
838 | if( length $entry->type && $entry->is_file ) { | |
839 | my $fh = IO::File->new; | |
840 | $fh->open( '>' . $full ) or ( | |
841 | $self->_error( qq[Could not open file '$full': $!] ), | |
842 | return | |
843 | ); | |
844 | ||
845 | if( $entry->size ) { | |
846 | binmode $fh; | |
847 | syswrite $fh, $entry->data or ( | |
848 | $self->_error( qq[Could not write data to '$full'] ), | |
849 | return | |
850 | ); | |
851 | } | |
852 | ||
853 | close $fh or ( | |
854 | $self->_error( qq[Could not close file '$full'] ), | |
855 | return | |
856 | ); | |
857 | ||
858 | } else { | |
859 | $self->_make_special_file( $entry, $full ) or return; | |
860 | } | |
861 | ||
642eb381 SH |
862 | ### only update the timestamp if it's not a symlink; that will change the |
863 | ### timestamp of the original. This addresses bug #33669: Could not update | |
864 | ### timestamp warning on symlinks | |
865 | if( not -l $full ) { | |
866 | utime time, $entry->mtime - TIME_OFFSET, $full or | |
867 | $self->_error( qq[Could not update timestamp] ); | |
868 | } | |
39713df4 | 869 | |
2610e7a4 | 870 | if( $CHOWN && CAN_CHOWN->() ) { |
39713df4 RGS |
871 | chown $entry->uid, $entry->gid, $full or |
872 | $self->_error( qq[Could not set uid/gid on '$full'] ); | |
873 | } | |
874 | ||
875 | ### only chmod if we're allowed to, but never chmod symlinks, since they'll | |
876 | ### change the perms on the file they're linking too... | |
877 | if( $CHMOD and not -l $full ) { | |
1c82faa7 JB |
878 | my $mode = $entry->mode; |
879 | unless ($SAME_PERMISSIONS) { | |
880 | $mode &= ~(oct(7000) | umask); | |
881 | } | |
882 | chmod $mode, $full or | |
39713df4 RGS |
883 | $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); |
884 | } | |
885 | ||
886 | return 1; | |
887 | } | |
888 | ||
889 | sub _make_special_file { | |
890 | my $self = shift; | |
891 | my $entry = shift or return; | |
892 | my $file = shift; return unless defined $file; | |
893 | ||
894 | my $err; | |
895 | ||
896 | if( $entry->is_symlink ) { | |
897 | my $fail; | |
898 | if( ON_UNIX ) { | |
899 | symlink( $entry->linkname, $file ) or $fail++; | |
900 | ||
901 | } else { | |
902 | $self->_extract_special_file_as_plain_file( $entry, $file ) | |
903 | or $fail++; | |
904 | } | |
905 | ||
642eb381 SH |
906 | $err = qq[Making symbolic link '$file' to '] . |
907 | $entry->linkname .q[' failed] if $fail; | |
39713df4 RGS |
908 | |
909 | } elsif ( $entry->is_hardlink ) { | |
910 | my $fail; | |
911 | if( ON_UNIX ) { | |
912 | link( $entry->linkname, $file ) or $fail++; | |
913 | ||
914 | } else { | |
915 | $self->_extract_special_file_as_plain_file( $entry, $file ) | |
916 | or $fail++; | |
917 | } | |
918 | ||
919 | $err = qq[Making hard link from '] . $entry->linkname . | |
920 | qq[' to '$file' failed] if $fail; | |
921 | ||
922 | } elsif ( $entry->is_fifo ) { | |
923 | ON_UNIX && !system('mknod', $file, 'p') or | |
924 | $err = qq[Making fifo ']. $entry->name .qq[' failed]; | |
925 | ||
926 | } elsif ( $entry->is_blockdev or $entry->is_chardev ) { | |
927 | my $mode = $entry->is_blockdev ? 'b' : 'c'; | |
928 | ||
929 | ON_UNIX && !system('mknod', $file, $mode, | |
930 | $entry->devmajor, $entry->devminor) or | |
931 | $err = qq[Making block device ']. $entry->name .qq[' (maj=] . | |
932 | $entry->devmajor . qq[ min=] . $entry->devminor . | |
933 | qq[) failed.]; | |
934 | ||
935 | } elsif ( $entry->is_socket ) { | |
936 | ### the original doesn't do anything special for sockets.... ### | |
937 | 1; | |
938 | } | |
939 | ||
940 | return $err ? $self->_error( $err ) : 1; | |
941 | } | |
942 | ||
943 | ### don't know how to make symlinks, let's just extract the file as | |
944 | ### a plain file | |
945 | sub _extract_special_file_as_plain_file { | |
946 | my $self = shift; | |
947 | my $entry = shift or return; | |
948 | my $file = shift; return unless defined $file; | |
949 | ||
950 | my $err; | |
951 | TRY: { | |
952 | my $orig = $self->_find_entry( $entry->linkname ); | |
953 | ||
954 | unless( $orig ) { | |
955 | $err = qq[Could not find file '] . $entry->linkname . | |
956 | qq[' in memory.]; | |
957 | last TRY; | |
958 | } | |
959 | ||
960 | ### clone the entry, make it appear as a normal file ### | |
961 | my $clone = $entry->clone; | |
962 | $clone->_downgrade_to_plainfile; | |
963 | $self->_extract_file( $clone, $file ) or last TRY; | |
964 | ||
965 | return 1; | |
966 | } | |
967 | ||
968 | return $self->_error($err); | |
969 | } | |
970 | ||
971 | =head2 $tar->list_files( [\@properties] ) | |
972 | ||
973 | Returns a list of the names of all the files in the archive. | |
974 | ||
975 | If C<list_files()> is passed an array reference as its first argument | |
976 | it returns a list of hash references containing the requested | |
977 | properties of each file. The following list of properties is | |
978 | supported: name, size, mtime (last modified date), mode, uid, gid, | |
979 | linkname, uname, gname, devmajor, devminor, prefix. | |
980 | ||
981 | Passing an array reference containing only one element, 'name', is | |
982 | special cased to return a list of names rather than a list of hash | |
983 | references, making it equivalent to calling C<list_files> without | |
984 | arguments. | |
985 | ||
986 | =cut | |
987 | ||
988 | sub list_files { | |
989 | my $self = shift; | |
990 | my $aref = shift || [ ]; | |
991 | ||
992 | unless( $self->_data ) { | |
993 | $self->read() or return; | |
994 | } | |
995 | ||
996 | if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { | |
997 | return map { $_->full_path } @{$self->_data}; | |
998 | } else { | |
999 | ||
1000 | #my @rv; | |
1001 | #for my $obj ( @{$self->_data} ) { | |
1002 | # push @rv, { map { $_ => $obj->$_() } @$aref }; | |
1003 | #} | |
1004 | #return @rv; | |
1005 | ||
1006 | ### this does the same as the above.. just needs a +{ } | |
1007 | ### to make sure perl doesn't confuse it for a block | |
1008 | return map { my $o=$_; | |
1009 | +{ map { $_ => $o->$_() } @$aref } | |
1010 | } @{$self->_data}; | |
1011 | } | |
1012 | } | |
1013 | ||
1014 | sub _find_entry { | |
1015 | my $self = shift; | |
1016 | my $file = shift; | |
1017 | ||
1018 | unless( defined $file ) { | |
1019 | $self->_error( qq[No file specified] ); | |
1020 | return; | |
1021 | } | |
1022 | ||
b30bcf62 RGS |
1023 | ### it's an object already |
1024 | return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); | |
1025 | ||
39713df4 RGS |
1026 | for my $entry ( @{$self->_data} ) { |
1027 | my $path = $entry->full_path; | |
1028 | return $entry if $path eq $file; | |
1029 | } | |
1030 | ||
1031 | $self->_error( qq[No such file in archive: '$file'] ); | |
1032 | return; | |
1033 | } | |
1034 | ||
1035 | =head2 $tar->get_files( [@filenames] ) | |
1036 | ||
1037 | Returns the C<Archive::Tar::File> objects matching the filenames | |
1038 | provided. If no filename list was passed, all C<Archive::Tar::File> | |
1039 | objects in the current Tar object are returned. | |
1040 | ||
1041 | Please refer to the C<Archive::Tar::File> documentation on how to | |
1042 | handle these objects. | |
1043 | ||
1044 | =cut | |
1045 | ||
1046 | sub get_files { | |
1047 | my $self = shift; | |
1048 | ||
1049 | return @{ $self->_data } unless @_; | |
1050 | ||
1051 | my @list; | |
1052 | for my $file ( @_ ) { | |
1053 | push @list, grep { defined } $self->_find_entry( $file ); | |
1054 | } | |
1055 | ||
1056 | return @list; | |
1057 | } | |
1058 | ||
1059 | =head2 $tar->get_content( $file ) | |
1060 | ||
1061 | Return the content of the named file. | |
1062 | ||
1063 | =cut | |
1064 | ||
1065 | sub get_content { | |
1066 | my $self = shift; | |
1067 | my $entry = $self->_find_entry( shift ) or return; | |
1068 | ||
1069 | return $entry->data; | |
1070 | } | |
1071 | ||
1072 | =head2 $tar->replace_content( $file, $content ) | |
1073 | ||
1074 | Make the string $content be the content for the file named $file. | |
1075 | ||
1076 | =cut | |
1077 | ||
1078 | sub replace_content { | |
1079 | my $self = shift; | |
1080 | my $entry = $self->_find_entry( shift ) or return; | |
1081 | ||
1082 | return $entry->replace_content( shift ); | |
1083 | } | |
1084 | ||
1085 | =head2 $tar->rename( $file, $new_name ) | |
1086 | ||
1087 | Rename the file of the in-memory archive to $new_name. | |
1088 | ||
1089 | Note that you must specify a Unix path for $new_name, since per tar | |
1090 | standard, all files in the archive must be Unix paths. | |
1091 | ||
1092 | Returns true on success and false on failure. | |
1093 | ||
1094 | =cut | |
1095 | ||
1096 | sub rename { | |
1097 | my $self = shift; | |
1098 | my $file = shift; return unless defined $file; | |
1099 | my $new = shift; return unless defined $new; | |
1100 | ||
1101 | my $entry = $self->_find_entry( $file ) or return; | |
1102 | ||
1103 | return $entry->rename( $new ); | |
1104 | } | |
1105 | ||
1e915955 CBW |
1106 | =head2 $tar->chmod( $file, $mode ) |
1107 | ||
1108 | Change mode of $file to $mode. | |
1109 | ||
1110 | Returns true on success and false on failure. | |
1111 | ||
1112 | =cut | |
1113 | ||
1114 | sub chmod { | |
1115 | my $self = shift; | |
1116 | my $file = shift; return unless defined $file; | |
1117 | my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; | |
1118 | my @args = ("$mode"); | |
1119 | ||
1120 | my $entry = $self->_find_entry( $file ) or return; | |
1121 | my $x = $entry->chmod( @args ); | |
1122 | return $x; | |
1123 | } | |
1124 | ||
93e94d8a CBW |
1125 | =head2 $tar->chown( $file, $uname [, $gname] ) |
1126 | ||
1127 | Change owner $file to $uname and $gname. | |
1128 | ||
1129 | Returns true on success and false on failure. | |
1130 | ||
1131 | =cut | |
1132 | ||
1133 | sub chown { | |
1134 | my $self = shift; | |
1135 | my $file = shift; return unless defined $file; | |
1136 | my $uname = shift; return unless defined $uname; | |
1137 | my @args = ($uname); | |
1138 | push(@args, shift); | |
1139 | ||
1140 | my $entry = $self->_find_entry( $file ) or return; | |
1141 | my $x = $entry->chown( @args ); | |
1142 | return $x; | |
1143 | } | |
1144 | ||
39713df4 RGS |
1145 | =head2 $tar->remove (@filenamelist) |
1146 | ||
1147 | Removes any entries with names matching any of the given filenames | |
1148 | from the in-memory archive. Returns a list of C<Archive::Tar::File> | |
1149 | objects that remain. | |
1150 | ||
1151 | =cut | |
1152 | ||
1153 | sub remove { | |
1154 | my $self = shift; | |
1155 | my @list = @_; | |
1156 | ||
1157 | my %seen = map { $_->full_path => $_ } @{$self->_data}; | |
1158 | delete $seen{ $_ } for @list; | |
1159 | ||
1160 | $self->_data( [values %seen] ); | |
1161 | ||
1162 | return values %seen; | |
1163 | } | |
1164 | ||
1165 | =head2 $tar->clear | |
1166 | ||
1167 | C<clear> clears the current in-memory archive. This effectively gives | |
1168 | you a 'blank' object, ready to be filled again. Note that C<clear> | |
1169 | only has effect on the object, not the underlying tarfile. | |
1170 | ||
1171 | =cut | |
1172 | ||
1173 | sub clear { | |
1174 | my $self = shift or return; | |
1175 | ||
1176 | $self->_data( [] ); | |
1177 | $self->_file( '' ); | |
1178 | ||
1179 | return 1; | |
1180 | } | |
1181 | ||
1182 | ||
1183 | =head2 $tar->write ( [$file, $compressed, $prefix] ) | |
1184 | ||
1185 | Write the in-memory archive to disk. The first argument can either | |
1186 | be the name of a file or a reference to an already open filehandle (a | |
e0d68803 | 1187 | GLOB reference). |
642eb381 | 1188 | |
e0d68803 | 1189 | The second argument is used to indicate compression. You can either |
642eb381 | 1190 | compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed |
e0d68803 | 1191 | to be the C<gzip> compression level (between 1 and 9), but the use of |
eadbb00b | 1192 | constants is preferred: |
642eb381 SH |
1193 | |
1194 | # write a gzip compressed file | |
bef46b70 | 1195 | $tar->write( 'out.tgz', COMPRESS_GZIP ); |
642eb381 | 1196 | |
e0d68803 | 1197 | # write a bzip compressed file |
bef46b70 | 1198 | $tar->write( 'out.tbz', COMPRESS_BZIP ); |
39713df4 RGS |
1199 | |
1200 | Note that when you pass in a filehandle, the compression argument | |
1201 | is ignored, as all files are printed verbatim to your filehandle. | |
1202 | If you wish to enable compression with filehandles, use an | |
642eb381 | 1203 | C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. |
39713df4 RGS |
1204 | |
1205 | The third argument is an optional prefix. All files will be tucked | |
1206 | away in the directory you specify as prefix. So if you have files | |
1207 | 'a' and 'b' in your archive, and you specify 'foo' as prefix, they | |
1208 | will be written to the archive as 'foo/a' and 'foo/b'. | |
1209 | ||
1210 | If no arguments are given, C<write> returns the entire formatted | |
1211 | archive as a string, which could be useful if you'd like to stuff the | |
1212 | archive into a socket or a pipe to gzip or something. | |
1213 | ||
642eb381 | 1214 | |
39713df4 RGS |
1215 | =cut |
1216 | ||
1217 | sub write { | |
1218 | my $self = shift; | |
1219 | my $file = shift; $file = '' unless defined $file; | |
1220 | my $gzip = shift || 0; | |
1221 | my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; | |
1222 | my $dummy = ''; | |
e0d68803 | 1223 | |
39713df4 RGS |
1224 | ### only need a handle if we have a file to print to ### |
1225 | my $handle = length($file) | |
1226 | ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) | |
1227 | or return ) | |
1228 | : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } | |
e0d68803 | 1229 | : $HAS_IO_STRING ? IO::String->new |
39713df4 RGS |
1230 | : __PACKAGE__->no_string_support(); |
1231 | ||
e0d68803 JB |
1232 | ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a |
1233 | ### corrupt TAR file. Must clear out $\ to make sure no garbage is | |
1234 | ### printed to the archive | |
1235 | local $\; | |
39713df4 RGS |
1236 | |
1237 | for my $entry ( @{$self->_data} ) { | |
1238 | ### entries to be written to the tarfile ### | |
1239 | my @write_me; | |
1240 | ||
1241 | ### only now will we change the object to reflect the current state | |
1242 | ### of the name and prefix fields -- this needs to be limited to | |
1243 | ### write() only! | |
1244 | my $clone = $entry->clone; | |
1245 | ||
1246 | ||
e0d68803 | 1247 | ### so, if you don't want use to use the prefix, we'll stuff |
39713df4 RGS |
1248 | ### everything in the name field instead |
1249 | if( $DO_NOT_USE_PREFIX ) { | |
1250 | ||
1251 | ### you might have an extended prefix, if so, set it in the clone | |
1252 | ### XXX is ::Unix right? | |
1253 | $clone->name( length $ext_prefix | |
1254 | ? File::Spec::Unix->catdir( $ext_prefix, | |
1255 | $clone->full_path) | |
1256 | : $clone->full_path ); | |
1257 | $clone->prefix( '' ); | |
1258 | ||
1259 | ### otherwise, we'll have to set it properly -- prefix part in the | |
1260 | ### prefix and name part in the name field. | |
1261 | } else { | |
1262 | ||
1263 | ### split them here, not before! | |
1264 | my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); | |
1265 | ||
1266 | ### you might have an extended prefix, if so, set it in the clone | |
1267 | ### XXX is ::Unix right? | |
1268 | $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) | |
1269 | if length $ext_prefix; | |
1270 | ||
1271 | $clone->prefix( $prefix ); | |
1272 | $clone->name( $name ); | |
1273 | } | |
1274 | ||
1275 | ### names are too long, and will get truncated if we don't add a | |
1276 | ### '@LongLink' file... | |
1277 | my $make_longlink = ( length($clone->name) > NAME_LENGTH or | |
1278 | length($clone->prefix) > PREFIX_LENGTH | |
1279 | ) || 0; | |
1280 | ||
1281 | ### perhaps we need to make a longlink file? | |
1282 | if( $make_longlink ) { | |
1283 | my $longlink = Archive::Tar::File->new( | |
1284 | data => LONGLINK_NAME, | |
1285 | $clone->full_path, | |
1286 | { type => LONGLINK } | |
1287 | ); | |
1288 | ||
1289 | unless( $longlink ) { | |
1290 | $self->_error( qq[Could not create 'LongLink' entry for ] . | |
1291 | qq[oversize file '] . $clone->full_path ."'" ); | |
1292 | return; | |
1293 | }; | |
1294 | ||
1295 | push @write_me, $longlink; | |
1296 | } | |
1297 | ||
1298 | push @write_me, $clone; | |
1299 | ||
1300 | ### write the one, optionally 2 a::t::file objects to the handle | |
1301 | for my $clone (@write_me) { | |
1302 | ||
1303 | ### if the file is a symlink, there are 2 options: | |
1304 | ### either we leave the symlink intact, but then we don't write any | |
1305 | ### data OR we follow the symlink, which means we actually make a | |
1306 | ### copy. if we do the latter, we have to change the TYPE of the | |
1307 | ### clone to 'FILE' | |
1308 | my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; | |
1309 | my $data_ok = !$clone->is_symlink && $clone->has_content; | |
1310 | ||
1311 | ### downgrade to a 'normal' file if it's a symlink we're going to | |
1312 | ### treat as a regular file | |
1313 | $clone->_downgrade_to_plainfile if $link_ok; | |
1314 | ||
1315 | ### get the header for this block | |
1316 | my $header = $self->_format_tar_entry( $clone ); | |
1317 | unless( $header ) { | |
1318 | $self->_error(q[Could not format header for: ] . | |
1319 | $clone->full_path ); | |
1320 | return; | |
1321 | } | |
1322 | ||
1323 | unless( print $handle $header ) { | |
1324 | $self->_error(q[Could not write header for: ] . | |
1325 | $clone->full_path); | |
1326 | return; | |
1327 | } | |
1328 | ||
1329 | if( $link_ok or $data_ok ) { | |
1330 | unless( print $handle $clone->data ) { | |
1331 | $self->_error(q[Could not write data for: ] . | |
1332 | $clone->full_path); | |
1333 | return; | |
1334 | } | |
1335 | ||
1336 | ### pad the end of the clone if required ### | |
1337 | print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK | |
1338 | } | |
1339 | ||
1340 | } ### done writing these entries | |
1341 | } | |
1342 | ||
1343 | ### write the end markers ### | |
1344 | print $handle TAR_END x 2 or | |
1345 | return $self->_error( qq[Could not write tar end markers] ); | |
b30bcf62 | 1346 | |
39713df4 | 1347 | ### did you want it written to a file, or returned as a string? ### |
b30bcf62 | 1348 | my $rv = length($file) ? 1 |
39713df4 | 1349 | : $HAS_PERLIO ? $dummy |
b30bcf62 RGS |
1350 | : do { seek $handle, 0, 0; local $/; <$handle> }; |
1351 | ||
d33cd7cf | 1352 | ### make sure to close the handle if we created it |
afabe0e8 CBW |
1353 | if ( $file ne $handle ) { |
1354 | unless( close $handle ) { | |
1355 | $self->_error( qq[Could not write tar] ); | |
1356 | return; | |
1357 | } | |
1358 | } | |
e0d68803 | 1359 | |
b30bcf62 | 1360 | return $rv; |
39713df4 RGS |
1361 | } |
1362 | ||
1363 | sub _format_tar_entry { | |
1364 | my $self = shift; | |
1365 | my $entry = shift or return; | |
1366 | my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; | |
1367 | my $no_prefix = shift || 0; | |
1368 | ||
1369 | my $file = $entry->name; | |
1370 | my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; | |
1371 | ||
1372 | ### remove the prefix from the file name | |
eadbb00b | 1373 | ### not sure if this is still needed --kane |
39713df4 RGS |
1374 | ### no it's not -- Archive::Tar::File->_new_from_file will take care of |
1375 | ### this for us. Even worse, this would break if we tried to add a file | |
1376 | ### like x/x. | |
1377 | #if( length $prefix ) { | |
1378 | # $file =~ s/^$match//; | |
1379 | #} | |
1380 | ||
1381 | $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) | |
1382 | if length $ext_prefix; | |
1383 | ||
1384 | ### not sure why this is... ### | |
1385 | my $l = PREFIX_LENGTH; # is ambiguous otherwise... | |
1386 | substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; | |
1387 | ||
d33cd7cf | 1388 | my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; |
39713df4 RGS |
1389 | |
1390 | ### this might be optimizable with a 'changed' flag in the file objects ### | |
1391 | my $tar = pack ( | |
1392 | PACK, | |
1393 | $file, | |
1394 | ||
1395 | (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), | |
1396 | (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), | |
1397 | ||
1398 | "", # checksum field - space padded a bit down | |
1399 | ||
1400 | (map { $entry->$_() } qw[type linkname magic]), | |
1401 | ||
1402 | $entry->version || TAR_VERSION, | |
1403 | ||
1404 | (map { $entry->$_() } qw[uname gname]), | |
1405 | (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), | |
1406 | ||
1407 | ($no_prefix ? '' : $prefix) | |
1408 | ); | |
1409 | ||
1410 | ### add the checksum ### | |
d33cd7cf | 1411 | my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; |
39713df4 RGS |
1412 | substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); |
1413 | ||
1414 | return $tar; | |
1415 | } | |
1416 | ||
1417 | =head2 $tar->add_files( @filenamelist ) | |
1418 | ||
1419 | Takes a list of filenames and adds them to the in-memory archive. | |
1420 | ||
1421 | The path to the file is automatically converted to a Unix like | |
1422 | equivalent for use in the archive, and, if on MacOS, the file's | |
1423 | modification time is converted from the MacOS epoch to the Unix epoch. | |
1424 | So tar archives created on MacOS with B<Archive::Tar> can be read | |
1425 | both with I<tar> on Unix and applications like I<suntar> or | |
1426 | I<Stuffit Expander> on MacOS. | |
1427 | ||
1428 | Be aware that the file's type/creator and resource fork will be lost, | |
1429 | which is usually what you want in cross-platform archives. | |
1430 | ||
2610e7a4 JB |
1431 | Instead of a filename, you can also pass it an existing C<Archive::Tar::File> |
1432 | object from, for example, another archive. The object will be clone, and | |
1433 | effectively be a copy of the original, not an alias. | |
1434 | ||
39713df4 RGS |
1435 | Returns a list of C<Archive::Tar::File> objects that were just added. |
1436 | ||
1437 | =cut | |
1438 | ||
1439 | sub add_files { | |
1440 | my $self = shift; | |
1441 | my @files = @_ or return; | |
1442 | ||
1443 | my @rv; | |
1444 | for my $file ( @files ) { | |
2610e7a4 JB |
1445 | |
1446 | ### you passed an Archive::Tar::File object | |
1447 | ### clone it so we don't accidentally have a reference to | |
1448 | ### an object from another archive | |
1449 | if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { | |
e0d68803 | 1450 | push @rv, $file->clone; |
2610e7a4 JB |
1451 | next; |
1452 | } | |
e0d68803 | 1453 | |
c3745331 | 1454 | unless( -e $file || -l $file ) { |
39713df4 RGS |
1455 | $self->_error( qq[No such file: '$file'] ); |
1456 | next; | |
1457 | } | |
1458 | ||
1459 | my $obj = Archive::Tar::File->new( file => $file ); | |
1460 | unless( $obj ) { | |
1461 | $self->_error( qq[Unable to add file: '$file'] ); | |
1462 | next; | |
1463 | } | |
1464 | ||
1465 | push @rv, $obj; | |
1466 | } | |
1467 | ||
1468 | push @{$self->{_data}}, @rv; | |
1469 | ||
1470 | return @rv; | |
1471 | } | |
1472 | ||
1473 | =head2 $tar->add_data ( $filename, $data, [$opthashref] ) | |
1474 | ||
1475 | Takes a filename, a scalar full of data and optionally a reference to | |
1476 | a hash with specific options. | |
1477 | ||
1478 | Will add a file to the in-memory archive, with name C<$filename> and | |
1479 | content C<$data>. Specific properties can be set using C<$opthashref>. | |
1480 | The following list of properties is supported: name, size, mtime | |
1481 | (last modified date), mode, uid, gid, linkname, uname, gname, | |
b3200c5d | 1482 | devmajor, devminor, prefix, type. (On MacOS, the file's path and |
39713df4 RGS |
1483 | modification times are converted to Unix equivalents.) |
1484 | ||
b3200c5d SP |
1485 | Valid values for the file type are the following constants defined in |
1486 | Archive::Tar::Constants: | |
1487 | ||
1488 | =over 4 | |
1489 | ||
1490 | =item FILE | |
1491 | ||
1492 | Regular file. | |
1493 | ||
1494 | =item HARDLINK | |
1495 | ||
1496 | =item SYMLINK | |
1497 | ||
1498 | Hard and symbolic ("soft") links; linkname should specify target. | |
1499 | ||
1500 | =item CHARDEV | |
1501 | ||
1502 | =item BLOCKDEV | |
1503 | ||
1504 | Character and block devices. devmajor and devminor should specify the major | |
1505 | and minor device numbers. | |
1506 | ||
1507 | =item DIR | |
1508 | ||
1509 | Directory. | |
1510 | ||
1511 | =item FIFO | |
1512 | ||
1513 | FIFO (named pipe). | |
1514 | ||
1515 | =item SOCKET | |
1516 | ||
1517 | Socket. | |
1518 | ||
1519 | =back | |
1520 | ||
39713df4 RGS |
1521 | Returns the C<Archive::Tar::File> object that was just added, or |
1522 | C<undef> on failure. | |
1523 | ||
1524 | =cut | |
1525 | ||
1526 | sub add_data { | |
1527 | my $self = shift; | |
1528 | my ($file, $data, $opt) = @_; | |
1529 | ||
1530 | my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); | |
1531 | unless( $obj ) { | |
1532 | $self->_error( qq[Unable to add file: '$file'] ); | |
1533 | return; | |
1534 | } | |
1535 | ||
1536 | push @{$self->{_data}}, $obj; | |
1537 | ||
1538 | return $obj; | |
1539 | } | |
1540 | ||
1541 | =head2 $tar->error( [$BOOL] ) | |
1542 | ||
1543 | Returns the current errorstring (usually, the last error reported). | |
1544 | If a true value was specified, it will give the C<Carp::longmess> | |
1545 | equivalent of the error, in effect giving you a stacktrace. | |
1546 | ||
1547 | For backwards compatibility, this error is also available as | |
1548 | C<$Archive::Tar::error> although it is much recommended you use the | |
1549 | method call instead. | |
1550 | ||
1551 | =cut | |
1552 | ||
1553 | { | |
1554 | $error = ''; | |
1555 | my $longmess; | |
1556 | ||
1557 | sub _error { | |
1558 | my $self = shift; | |
1559 | my $msg = $error = shift; | |
1560 | $longmess = Carp::longmess($error); | |
941cb2bb CBW |
1561 | if (ref $self) { |
1562 | $self->{_error} = $error; | |
1563 | $self->{_longmess} = $longmess; | |
1564 | } | |
39713df4 RGS |
1565 | |
1566 | ### set Archive::Tar::WARN to 0 to disable printing | |
1567 | ### of errors | |
1568 | if( $WARN ) { | |
1569 | carp $DEBUG ? $longmess : $msg; | |
1570 | } | |
1571 | ||
1572 | return; | |
1573 | } | |
1574 | ||
1575 | sub error { | |
1576 | my $self = shift; | |
941cb2bb CBW |
1577 | if (ref $self) { |
1578 | return shift() ? $self->{_longmess} : $self->{_error}; | |
1579 | } else { | |
1580 | return shift() ? $longmess : $error; | |
1581 | } | |
39713df4 RGS |
1582 | } |
1583 | } | |
1584 | ||
f38c1908 SP |
1585 | =head2 $tar->setcwd( $cwd ); |
1586 | ||
1587 | C<Archive::Tar> needs to know the current directory, and it will run | |
e0d68803 | 1588 | C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the |
f38c1908 | 1589 | tarfile and saves it in the file system. (As of version 1.30, however, |
e0d68803 | 1590 | C<Archive::Tar> will use the speed optimization described below |
f38c1908 SP |
1591 | automatically, so it's only relevant if you're using C<extract_file()>). |
1592 | ||
1593 | Since C<Archive::Tar> doesn't change the current directory internally | |
1594 | while it is extracting the items in a tarball, all calls to C<Cwd::cwd()> | |
1595 | can be avoided if we can guarantee that the current directory doesn't | |
1596 | get changed externally. | |
1597 | ||
1598 | To use this performance boost, set the current directory via | |
1599 | ||
1600 | use Cwd; | |
1601 | $tar->setcwd( cwd() ); | |
1602 | ||
1603 | once before calling a function like C<extract_file> and | |
1604 | C<Archive::Tar> will use the current directory setting from then on | |
e0d68803 | 1605 | and won't call C<Cwd::cwd()> internally. |
f38c1908 SP |
1606 | |
1607 | To switch back to the default behaviour, use | |
1608 | ||
1609 | $tar->setcwd( undef ); | |
1610 | ||
1611 | and C<Archive::Tar> will call C<Cwd::cwd()> internally again. | |
1612 | ||
eadbb00b | 1613 | If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will |
f38c1908 SP |
1614 | be called for you. |
1615 | ||
e0d68803 | 1616 | =cut |
f38c1908 SP |
1617 | |
1618 | sub setcwd { | |
1619 | my $self = shift; | |
1620 | my $cwd = shift; | |
1621 | ||
1622 | $self->{cwd} = $cwd; | |
1623 | } | |
39713df4 | 1624 | |
39713df4 RGS |
1625 | =head1 Class Methods |
1626 | ||
642eb381 | 1627 | =head2 Archive::Tar->create_archive($file, $compressed, @filelist) |
39713df4 RGS |
1628 | |
1629 | Creates a tar file from the list of files provided. The first | |
1630 | argument can either be the name of the tar file to create or a | |
1631 | reference to an open file handle (e.g. a GLOB reference). | |
1632 | ||
e0d68803 | 1633 | The second argument is used to indicate compression. You can either |
642eb381 | 1634 | compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed |
e0d68803 | 1635 | to be the C<gzip> compression level (between 1 and 9), but the use of |
eadbb00b | 1636 | constants is preferred: |
642eb381 SH |
1637 | |
1638 | # write a gzip compressed file | |
bef46b70 | 1639 | Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); |
642eb381 | 1640 | |
e0d68803 | 1641 | # write a bzip compressed file |
bef46b70 | 1642 | Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); |
39713df4 RGS |
1643 | |
1644 | Note that when you pass in a filehandle, the compression argument | |
1645 | is ignored, as all files are printed verbatim to your filehandle. | |
1646 | If you wish to enable compression with filehandles, use an | |
642eb381 | 1647 | C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. |
39713df4 RGS |
1648 | |
1649 | The remaining arguments list the files to be included in the tar file. | |
1650 | These files must all exist. Any files which don't exist or can't be | |
1651 | read are silently ignored. | |
1652 | ||
1653 | If the archive creation fails for any reason, C<create_archive> will | |
1654 | return false. Please use the C<error> method to find the cause of the | |
1655 | failure. | |
1656 | ||
1657 | Note that this method does not write C<on the fly> as it were; it | |
1658 | still reads all the files into memory before writing out the archive. | |
1659 | Consult the FAQ below if this is a problem. | |
1660 | ||
1661 | =cut | |
1662 | ||
1663 | sub create_archive { | |
1664 | my $class = shift; | |
1665 | ||
1666 | my $file = shift; return unless defined $file; | |
1667 | my $gzip = shift || 0; | |
1668 | my @files = @_; | |
1669 | ||
1670 | unless( @files ) { | |
1671 | return $class->_error( qq[Cowardly refusing to create empty archive!] ); | |
1672 | } | |
1673 | ||
1674 | my $tar = $class->new; | |
1675 | $tar->add_files( @files ); | |
1676 | return $tar->write( $file, $gzip ); | |
1677 | } | |
1678 | ||
642eb381 SH |
1679 | =head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) |
1680 | ||
1681 | Returns an iterator function that reads the tar file without loading | |
1682 | it all in memory. Each time the function is called it will return the | |
1683 | next file in the tarball. The files are returned as | |
1684 | C<Archive::Tar::File> objects. The iterator function returns the | |
941cb2bb | 1685 | empty list once it has exhausted the files contained. |
642eb381 SH |
1686 | |
1687 | The second argument can be a hash reference with options, which are | |
1688 | identical to the arguments passed to C<read()>. | |
1689 | ||
1690 | Example usage: | |
1691 | ||
1692 | my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); | |
1693 | ||
1694 | while( my $f = $next->() ) { | |
1695 | print $f->name, "\n"; | |
1696 | ||
1697 | $f->extract or warn "Extraction failed"; | |
e0d68803 | 1698 | |
642eb381 SH |
1699 | # .... |
1700 | } | |
1701 | ||
1702 | =cut | |
1703 | ||
1704 | ||
1705 | sub iter { | |
1706 | my $class = shift; | |
1707 | my $filename = shift or return; | |
93e94d8a | 1708 | my $compressed = shift || 0; |
642eb381 SH |
1709 | my $opts = shift || {}; |
1710 | ||
1711 | ### get a handle to read from. | |
1712 | my $handle = $class->_get_handle( | |
e0d68803 JB |
1713 | $filename, |
1714 | $compressed, | |
642eb381 SH |
1715 | READ_ONLY->( ZLIB ) |
1716 | ) or return; | |
1717 | ||
1718 | my @data; | |
1719 | return sub { | |
1720 | return shift(@data) if @data; # more than one file returned? | |
1721 | return unless $handle; # handle exhausted? | |
1722 | ||
1723 | ### read data, should only return file | |
941cb2bb CBW |
1724 | my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); |
1725 | @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; | |
642eb381 SH |
1726 | |
1727 | ### return one piece of data | |
1728 | return shift(@data) if @data; | |
e0d68803 | 1729 | |
642eb381 SH |
1730 | ### data is exhausted, free the filehandle |
1731 | undef $handle; | |
1732 | return; | |
1733 | }; | |
1734 | } | |
1735 | ||
1736 | =head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) | |
39713df4 RGS |
1737 | |
1738 | Returns a list of the names of all the files in the archive. The | |
1739 | first argument can either be the name of the tar file to list or a | |
1740 | reference to an open file handle (e.g. a GLOB reference). | |
1741 | ||
1742 | If C<list_archive()> is passed an array reference as its third | |
1743 | argument it returns a list of hash references containing the requested | |
1744 | properties of each file. The following list of properties is | |
e0d68803 | 1745 | supported: full_path, name, size, mtime (last modified date), mode, |
b3200c5d SP |
1746 | uid, gid, linkname, uname, gname, devmajor, devminor, prefix. |
1747 | ||
1748 | See C<Archive::Tar::File> for details about supported properties. | |
39713df4 RGS |
1749 | |
1750 | Passing an array reference containing only one element, 'name', is | |
1751 | special cased to return a list of names rather than a list of hash | |
1752 | references. | |
1753 | ||
1754 | =cut | |
1755 | ||
1756 | sub list_archive { | |
1757 | my $class = shift; | |
1758 | my $file = shift; return unless defined $file; | |
1759 | my $gzip = shift || 0; | |
1760 | ||
1761 | my $tar = $class->new($file, $gzip); | |
1762 | return unless $tar; | |
1763 | ||
1764 | return $tar->list_files( @_ ); | |
1765 | } | |
1766 | ||
642eb381 | 1767 | =head2 Archive::Tar->extract_archive($file, $compressed) |
39713df4 RGS |
1768 | |
1769 | Extracts the contents of the tar file. The first argument can either | |
1770 | be the name of the tar file to create or a reference to an open file | |
1771 | handle (e.g. a GLOB reference). All relative paths in the tar file will | |
1772 | be created underneath the current working directory. | |
1773 | ||
1774 | C<extract_archive> will return a list of files it extracted. | |
1775 | If the archive extraction fails for any reason, C<extract_archive> | |
1776 | will return false. Please use the C<error> method to find the cause | |
1777 | of the failure. | |
1778 | ||
1779 | =cut | |
1780 | ||
1781 | sub extract_archive { | |
1782 | my $class = shift; | |
1783 | my $file = shift; return unless defined $file; | |
1784 | my $gzip = shift || 0; | |
1785 | ||
1786 | my $tar = $class->new( ) or return; | |
1787 | ||
1788 | return $tar->read( $file, $gzip, { extract => 1 } ); | |
1789 | } | |
1790 | ||
f5695358 JB |
1791 | =head2 $bool = Archive::Tar->has_io_string |
1792 | ||
1793 | Returns true if we currently have C<IO::String> support loaded. | |
1794 | ||
e0d68803 | 1795 | Either C<IO::String> or C<perlio> support is needed to support writing |
f5695358 JB |
1796 | stringified archives. Currently, C<perlio> is the preferred method, if |
1797 | available. | |
1798 | ||
1799 | See the C<GLOBAL VARIABLES> section to see how to change this preference. | |
1800 | ||
1801 | =cut | |
1802 | ||
1803 | sub has_io_string { return $HAS_IO_STRING; } | |
1804 | ||
1805 | =head2 $bool = Archive::Tar->has_perlio | |
1806 | ||
1807 | Returns true if we currently have C<perlio> support loaded. | |
1808 | ||
e0d68803 | 1809 | This requires C<perl-5.8> or higher, compiled with C<perlio> |
f5695358 | 1810 | |
e0d68803 | 1811 | Either C<IO::String> or C<perlio> support is needed to support writing |
f5695358 JB |
1812 | stringified archives. Currently, C<perlio> is the preferred method, if |
1813 | available. | |
1814 | ||
1815 | See the C<GLOBAL VARIABLES> section to see how to change this preference. | |
1816 | ||
1817 | =cut | |
1818 | ||
1819 | sub has_perlio { return $HAS_PERLIO; } | |
1820 | ||
1821 | =head2 $bool = Archive::Tar->has_zlib_support | |
1822 | ||
1823 | Returns true if C<Archive::Tar> can extract C<zlib> compressed archives | |
1824 | ||
1825 | =cut | |
1826 | ||
1827 | sub has_zlib_support { return ZLIB } | |
1828 | ||
1829 | =head2 $bool = Archive::Tar->has_bzip2_support | |
1830 | ||
1831 | Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives | |
1832 | ||
1833 | =cut | |
1834 | ||
1835 | sub has_bzip2_support { return BZIP } | |
1836 | ||
39713df4 RGS |
1837 | =head2 Archive::Tar->can_handle_compressed_files |
1838 | ||
1839 | A simple checking routine, which will return true if C<Archive::Tar> | |
642eb381 SH |
1840 | is able to uncompress compressed archives on the fly with C<IO::Zlib> |
1841 | and C<IO::Compress::Bzip2> or false if not both are installed. | |
39713df4 RGS |
1842 | |
1843 | You can use this as a shortcut to determine whether C<Archive::Tar> | |
1844 | will do what you think before passing compressed archives to its | |
1845 | C<read> method. | |
1846 | ||
1847 | =cut | |
1848 | ||
642eb381 | 1849 | sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } |
39713df4 RGS |
1850 | |
1851 | sub no_string_support { | |
1852 | croak("You have to install IO::String to support writing archives to strings"); | |
1853 | } | |
1854 | ||
1855 | 1; | |
1856 | ||
1857 | __END__ | |
1858 | ||
1859 | =head1 GLOBAL VARIABLES | |
1860 | ||
1861 | =head2 $Archive::Tar::FOLLOW_SYMLINK | |
1862 | ||
1863 | Set this variable to C<1> to make C<Archive::Tar> effectively make a | |
1864 | copy of the file when extracting. Default is C<0>, which | |
1865 | means the symlink stays intact. Of course, you will have to pack the | |
1866 | file linked to as well. | |
1867 | ||
1868 | This option is checked when you write out the tarfile using C<write> | |
1869 | or C<create_archive>. | |
1870 | ||
1871 | This works just like C</bin/tar>'s C<-h> option. | |
1872 | ||
1873 | =head2 $Archive::Tar::CHOWN | |
1874 | ||
1875 | By default, C<Archive::Tar> will try to C<chown> your files if it is | |
1876 | able to. In some cases, this may not be desired. In that case, set | |
1877 | this variable to C<0> to disable C<chown>-ing, even if it were | |
1878 | possible. | |
1879 | ||
1880 | The default is C<1>. | |
1881 | ||
1882 | =head2 $Archive::Tar::CHMOD | |
1883 | ||
1884 | By default, C<Archive::Tar> will try to C<chmod> your files to | |
1885 | whatever mode was specified for the particular file in the archive. | |
1886 | In some cases, this may not be desired. In that case, set this | |
1887 | variable to C<0> to disable C<chmod>-ing. | |
1888 | ||
1889 | The default is C<1>. | |
1890 | ||
1c82faa7 JB |
1891 | =head2 $Archive::Tar::SAME_PERMISSIONS |
1892 | ||
1893 | When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether | |
1894 | the permissions on files from the archive are used without modification | |
1895 | of if they are filtered by removing any setid bits and applying the | |
1896 | current umask. | |
1897 | ||
1898 | The default is C<1> for the root user and C<0> for normal users. | |
1899 | ||
39713df4 RGS |
1900 | =head2 $Archive::Tar::DO_NOT_USE_PREFIX |
1901 | ||
e0d68803 | 1902 | By default, C<Archive::Tar> will try to put paths that are over |
f38c1908 | 1903 | 100 characters in the C<prefix> field of your tar header, as |
e0d68803 JB |
1904 | defined per POSIX-standard. However, some (older) tar programs |
1905 | do not implement this spec. To retain compatibility with these older | |
1906 | or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> | |
1907 | variable to a true value, and C<Archive::Tar> will use an alternate | |
1908 | way of dealing with paths over 100 characters by using the | |
f38c1908 SP |
1909 | C<GNU Extended Header> feature. |
1910 | ||
1911 | Note that clients who do not support the C<GNU Extended Header> | |
1912 | feature will not be able to read these archives. Such clients include | |
1913 | tars on C<Solaris>, C<Irix> and C<AIX>. | |
39713df4 RGS |
1914 | |
1915 | The default is C<0>. | |
1916 | ||
1917 | =head2 $Archive::Tar::DEBUG | |
1918 | ||
1919 | Set this variable to C<1> to always get the C<Carp::longmess> output | |
1920 | of the warnings, instead of the regular C<carp>. This is the same | |
1921 | message you would get by doing: | |
1922 | ||
1923 | $tar->error(1); | |
1924 | ||
1925 | Defaults to C<0>. | |
1926 | ||
1927 | =head2 $Archive::Tar::WARN | |
1928 | ||
1929 | Set this variable to C<0> if you do not want any warnings printed. | |
1930 | Personally I recommend against doing this, but people asked for the | |
1931 | option. Also, be advised that this is of course not threadsafe. | |
1932 | ||
1933 | Defaults to C<1>. | |
1934 | ||
1935 | =head2 $Archive::Tar::error | |
1936 | ||
1937 | Holds the last reported error. Kept for historical reasons, but its | |
1938 | use is very much discouraged. Use the C<error()> method instead: | |
1939 | ||
1940 | warn $tar->error unless $tar->extract; | |
1941 | ||
941cb2bb CBW |
1942 | Note that in older versions of this module, the C<error()> method |
1943 | would return an effectively global value even when called an instance | |
1944 | method as above. This has since been fixed, and multiple instances of | |
1945 | C<Archive::Tar> now have separate error strings. | |
1946 | ||
178aef9a RGS |
1947 | =head2 $Archive::Tar::INSECURE_EXTRACT_MODE |
1948 | ||
1949 | This variable indicates whether C<Archive::Tar> should allow | |
1950 | files to be extracted outside their current working directory. | |
1951 | ||
1952 | Allowing this could have security implications, as a malicious | |
1953 | tar archive could alter or replace any file the extracting user | |
e0d68803 JB |
1954 | has permissions to. Therefor, the default is to not allow |
1955 | insecure extractions. | |
178aef9a | 1956 | |
e0d68803 JB |
1957 | If you trust the archive, or have other reasons to allow the |
1958 | archive to write files outside your current working directory, | |
178aef9a RGS |
1959 | set this variable to C<true>. |
1960 | ||
1961 | Note that this is a backwards incompatible change from version | |
1962 | C<1.36> and before. | |
1963 | ||
39713df4 RGS |
1964 | =head2 $Archive::Tar::HAS_PERLIO |
1965 | ||
e0d68803 | 1966 | This variable holds a boolean indicating if we currently have |
39713df4 | 1967 | C<perlio> support loaded. This will be enabled for any perl |
e0d68803 | 1968 | greater than C<5.8> compiled with C<perlio>. |
39713df4 RGS |
1969 | |
1970 | If you feel strongly about disabling it, set this variable to | |
1971 | C<false>. Note that you will then need C<IO::String> installed | |
1972 | to support writing stringified archives. | |
1973 | ||
1974 | Don't change this variable unless you B<really> know what you're | |
1975 | doing. | |
1976 | ||
1977 | =head2 $Archive::Tar::HAS_IO_STRING | |
1978 | ||
e0d68803 | 1979 | This variable holds a boolean indicating if we currently have |
39713df4 RGS |
1980 | C<IO::String> support loaded. This will be enabled for any perl |
1981 | that has a loadable C<IO::String> module. | |
1982 | ||
1983 | If you feel strongly about disabling it, set this variable to | |
1984 | C<false>. Note that you will then need C<perlio> support from | |
1985 | your perl to be able to write stringified archives. | |
1986 | ||
1987 | Don't change this variable unless you B<really> know what you're | |
1988 | doing. | |
1989 | ||
d33cd7cf CBW |
1990 | =head2 $Archive::Tar::ZERO_PAD_NUMBERS |
1991 | ||
1992 | This variable holds a boolean indicating if we will create | |
93e94d8a | 1993 | zero padded numbers for C<size>, C<mtime> and C<checksum>. |
d33cd7cf CBW |
1994 | The default is C<0>, indicating that we will create space padded |
1995 | numbers. Added for compatibility with C<busybox> implementations. | |
1996 | ||
39713df4 RGS |
1997 | =head1 FAQ |
1998 | ||
1999 | =over 4 | |
2000 | ||
2001 | =item What's the minimum perl version required to run Archive::Tar? | |
2002 | ||
2003 | You will need perl version 5.005_03 or newer. | |
2004 | ||
2005 | =item Isn't Archive::Tar slow? | |
2006 | ||
2007 | Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> | |
2008 | However, it's very portable. If speed is an issue, consider using | |
2009 | C</bin/tar> instead. | |
2010 | ||
2011 | =item Isn't Archive::Tar heavier on memory than /bin/tar? | |
2012 | ||
2013 | Yes it is, see previous answer. Since C<Compress::Zlib> and therefore | |
2014 | C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little | |
2015 | choice but to read the archive into memory. | |
2016 | This is ok if you want to do in-memory manipulation of the archive. | |
642eb381 | 2017 | |
39713df4 RGS |
2018 | If you just want to extract, use the C<extract_archive> class method |
2019 | instead. It will optimize and write to disk immediately. | |
2020 | ||
642eb381 SH |
2021 | Another option is to use the C<iter> class method to iterate over |
2022 | the files in the tarball without reading them all in memory at once. | |
2023 | ||
2024 | =item Can you lazy-load data instead? | |
39713df4 | 2025 | |
642eb381 SH |
2026 | In some cases, yes. You can use the C<iter> class method to iterate |
2027 | over the files in the tarball without reading them all in memory at once. | |
39713df4 RGS |
2028 | |
2029 | =item How much memory will an X kb tar file need? | |
2030 | ||
2031 | Probably more than X kb, since it will all be read into memory. If | |
2032 | this is a problem, and you don't need to do in memory manipulation | |
e0d68803 | 2033 | of the archive, consider using the C<iter> class method, or C</bin/tar> |
642eb381 | 2034 | instead. |
39713df4 RGS |
2035 | |
2036 | =item What do you do with unsupported filetypes in an archive? | |
2037 | ||
2038 | C<Unix> has a few filetypes that aren't supported on other platforms, | |
2039 | like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just | |
2040 | try to make a copy of the original file, rather than throwing an error. | |
2041 | ||
2042 | This does require you to read the entire archive in to memory first, | |
2043 | since otherwise we wouldn't know what data to fill the copy with. | |
e0d68803 JB |
2044 | (This means that you cannot use the class methods, including C<iter> |
2045 | on archives that have incompatible filetypes and still expect things | |
642eb381 | 2046 | to work). |
39713df4 RGS |
2047 | |
2048 | For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that | |
2049 | the extraction of this particular item didn't work. | |
2050 | ||
f38c1908 SP |
2051 | =item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! |
2052 | ||
2053 | By default, C<Archive::Tar> is in a completely POSIX-compatible | |
2054 | mode, which uses the POSIX-specification of C<tar> to store files. | |
eadbb00b | 2055 | For paths greater than 100 characters, this is done using the |
f38c1908 SP |
2056 | C<POSIX header prefix>. Non-POSIX-compatible clients may not support |
2057 | this part of the specification, and may only support the C<GNU Extended | |
2058 | Header> functionality. To facilitate those clients, you can set the | |
e0d68803 | 2059 | C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the |
f38c1908 SP |
2060 | C<GLOBAL VARIABLES> section for details on this variable. |
2061 | ||
c3745331 RGS |
2062 | Note that GNU tar earlier than version 1.14 does not cope well with |
2063 | the C<POSIX header prefix>. If you use such a version, consider setting | |
2064 | the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. | |
2065 | ||
b30bcf62 RGS |
2066 | =item How do I extract only files that have property X from an archive? |
2067 | ||
2068 | Sometimes, you might not wish to extract a complete archive, just | |
2069 | the files that are relevant to you, based on some criteria. | |
2070 | ||
2071 | You can do this by filtering a list of C<Archive::Tar::File> objects | |
2072 | based on your criteria. For example, to extract only files that have | |
2073 | the string C<foo> in their title, you would use: | |
2074 | ||
e0d68803 | 2075 | $tar->extract( |
b30bcf62 | 2076 | grep { $_->full_path =~ /foo/ } $tar->get_files |
e0d68803 | 2077 | ); |
b30bcf62 RGS |
2078 | |
2079 | This way, you can filter on any attribute of the files in the archive. | |
2080 | Consult the C<Archive::Tar::File> documentation on how to use these | |
2081 | objects. | |
2082 | ||
81a5970e RGS |
2083 | =item How do I access .tar.Z files? |
2084 | ||
2085 | The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via | |
2086 | the C<IO::Zlib> module) to access tar files that have been compressed | |
2087 | with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> | |
2088 | utility cannot be read by C<Compress::Zlib> and so cannot be directly | |
2089 | accesses by C<Archive::Tar>. | |
2090 | ||
2091 | If the C<uncompress> or C<gunzip> programs are available, you can use | |
2092 | one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> | |
2093 | ||
2094 | Firstly with C<uncompress> | |
2095 | ||
2096 | use Archive::Tar; | |
2097 | ||
2098 | open F, "uncompress -c $filename |"; | |
2099 | my $tar = Archive::Tar->new(*F); | |
2100 | ... | |
2101 | ||
2102 | and this with C<gunzip> | |
2103 | ||
2104 | use Archive::Tar; | |
2105 | ||
2106 | open F, "gunzip -c $filename |"; | |
2107 | my $tar = Archive::Tar->new(*F); | |
2108 | ... | |
2109 | ||
2110 | Similarly, if the C<compress> program is available, you can use this to | |
2111 | write a C<.tar.Z> file | |
2112 | ||
2113 | use Archive::Tar; | |
2114 | use IO::File; | |
2115 | ||
2116 | my $fh = new IO::File "| compress -c >$filename"; | |
2117 | my $tar = Archive::Tar->new(); | |
2118 | ... | |
2119 | $tar->write($fh); | |
2120 | $fh->close ; | |
2121 | ||
01d11a1c SP |
2122 | =item How do I handle Unicode strings? |
2123 | ||
2124 | C<Archive::Tar> uses byte semantics for any files it reads from or writes | |
2125 | to disk. This is not a problem if you only deal with files and never | |
2126 | look at their content or work solely with byte strings. But if you use | |
2127 | Unicode strings with character semantics, some additional steps need | |
2128 | to be taken. | |
2129 | ||
2130 | For example, if you add a Unicode string like | |
2131 | ||
2132 | # Problem | |
2133 | $tar->add_data('file.txt', "Euro: \x{20AC}"); | |
2134 | ||
2135 | then there will be a problem later when the tarfile gets written out | |
2136 | to disk via C<$tar->write()>: | |
2137 | ||
2138 | Wide character in print at .../Archive/Tar.pm line 1014. | |
2139 | ||
2140 | The data was added as a Unicode string and when writing it out to disk, | |
2141 | the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl | |
2142 | tried to convert the string to ISO-8859 and failed. The written file | |
2143 | now contains garbage. | |
2144 | ||
2145 | For this reason, Unicode strings need to be converted to UTF-8-encoded | |
2146 | bytestrings before they are handed off to C<add_data()>: | |
2147 | ||
2148 | use Encode; | |
2149 | my $data = "Accented character: \x{20AC}"; | |
2150 | $data = encode('utf8', $data); | |
2151 | ||
2152 | $tar->add_data('file.txt', $data); | |
2153 | ||
e0d68803 | 2154 | A opposite problem occurs if you extract a UTF8-encoded file from a |
01d11a1c SP |
2155 | tarball. Using C<get_content()> on the C<Archive::Tar::File> object |
2156 | will return its content as a bytestring, not as a Unicode string. | |
2157 | ||
2158 | If you want it to be a Unicode string (because you want character | |
2159 | semantics with operations like regular expression matching), you need | |
e0d68803 | 2160 | to decode the UTF8-encoded content and have Perl convert it into |
01d11a1c SP |
2161 | a Unicode string: |
2162 | ||
2163 | use Encode; | |
2164 | my $data = $tar->get_content(); | |
e0d68803 | 2165 | |
01d11a1c SP |
2166 | # Make it a Unicode string |
2167 | $data = decode('utf8', $data); | |
2168 | ||
e0d68803 | 2169 | There is no easy way to provide this functionality in C<Archive::Tar>, |
01d11a1c SP |
2170 | because a tarball can contain many files, and each of which could be |
2171 | encoded in a different way. | |
81a5970e | 2172 | |
39713df4 RGS |
2173 | =back |
2174 | ||
f475b4a2 JB |
2175 | =head1 CAVEATS |
2176 | ||
93e94d8a | 2177 | The AIX tar does not fill all unused space in the tar archive with 0x00. |
f475b4a2 JB |
2178 | This sometimes leads to warning messages from C<Archive::Tar>. |
2179 | ||
2180 | Invalid header block at offset nnn | |
2181 | ||
2182 | A fix for that problem is scheduled to be released in the following levels | |
2183 | of AIX, all of which should be coming out in the 4th quarter of 2009: | |
2184 | ||
2185 | AIX 5.3 TL7 SP10 | |
2186 | AIX 5.3 TL8 SP8 | |
2187 | AIX 5.3 TL9 SP5 | |
2188 | AIX 5.3 TL10 SP2 | |
93e94d8a | 2189 | |
f475b4a2 JB |
2190 | AIX 6.1 TL0 SP11 |
2191 | AIX 6.1 TL1 SP7 | |
2192 | AIX 6.1 TL2 SP6 | |
2193 | AIX 6.1 TL3 SP3 | |
2194 | ||
93e94d8a CBW |
2195 | The IBM APAR number for this problem is IZ50240 (Reported component ID: |
2196 | 5765G0300 / AIX 5.3). It is possible to get an ifix for that problem. | |
f475b4a2 JB |
2197 | If you need an ifix please contact your local IBM AIX support. |
2198 | ||
39713df4 RGS |
2199 | =head1 TODO |
2200 | ||
2201 | =over 4 | |
2202 | ||
2203 | =item Check if passed in handles are open for read/write | |
2204 | ||
2205 | Currently I don't know of any portable pure perl way to do this. | |
2206 | Suggestions welcome. | |
2207 | ||
b3200c5d SP |
2208 | =item Allow archives to be passed in as string |
2209 | ||
2210 | Currently, we only allow opened filehandles or filenames, but | |
2211 | not strings. The internals would need some reworking to facilitate | |
2212 | stringified archives. | |
2213 | ||
2214 | =item Facilitate processing an opened filehandle of a compressed archive | |
2215 | ||
2216 | Currently, we only support this if the filehandle is an IO::Zlib object. | |
2217 | Environments, like apache, will present you with an opened filehandle | |
2218 | to an uploaded file, which might be a compressed archive. | |
2219 | ||
39713df4 RGS |
2220 | =back |
2221 | ||
f38c1908 SP |
2222 | =head1 SEE ALSO |
2223 | ||
2224 | =over 4 | |
2225 | ||
2226 | =item The GNU tar specification | |
2227 | ||
2228 | C<http://www.gnu.org/software/tar/manual/tar.html> | |
2229 | ||
eadbb00b | 2230 | =item The PAX format specification |
f38c1908 | 2231 | |
eadbb00b | 2232 | The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> |
f38c1908 SP |
2233 | |
2234 | =item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> | |
2235 | ||
2236 | =item GNU tar intends to switch to POSIX compatibility | |
2237 | ||
2238 | GNU Tar authors have expressed their intention to become completely | |
2239 | POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> | |
2240 | ||
2241 | =item A Comparison between various tar implementations | |
2242 | ||
2243 | Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> | |
2244 | ||
2245 | =back | |
2246 | ||
39713df4 RGS |
2247 | =head1 AUTHOR |
2248 | ||
c3745331 RGS |
2249 | This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. |
2250 | ||
2251 | Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. | |
39713df4 RGS |
2252 | |
2253 | =head1 ACKNOWLEDGEMENTS | |
2254 | ||
f475b4a2 JB |
2255 | Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, |
2256 | Rainer Tammer and especially Andrew Savige for their help and suggestions. | |
39713df4 RGS |
2257 | |
2258 | =head1 COPYRIGHT | |
2259 | ||
f475b4a2 | 2260 | This module is copyright (c) 2002 - 2009 Jos Boumans |
c3745331 | 2261 | E<lt>kane@cpan.orgE<gt>. All rights reserved. |
39713df4 | 2262 | |
e0d68803 | 2263 | This library is free software; you may redistribute and/or modify |
c3745331 | 2264 | it under the same terms as Perl itself. |
39713df4 RGS |
2265 | |
2266 | =cut |