This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
compression modules update to version 2.005
[perl5.git] / ext / IO_Compress_Zlib / lib / IO / Uncompress / Gunzip.pm
CommitLineData
642e522c
RGS
1
2package IO::Uncompress::Gunzip ;
3
4require 5.004 ;
5
6# for RFC1952
7
8use strict ;
9use warnings;
a02d0f6f 10use bytes;
642e522c 11
93d092e2 12use IO::Uncompress::RawInflate 2.005 ;
1a6a8453 13
93d092e2
PM
14use Compress::Raw::Zlib 2.005 qw( crc32 ) ;
15use IO::Compress::Base::Common 2.005 qw(:Status createSelfTiedObject);
16use IO::Compress::Gzip::Constants 2.005 ;
17use IO::Compress::Zlib::Extra 2.005 ;
1a6a8453 18
642e522c
RGS
19require Exporter ;
20
21our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GunzipError);
22
1a6a8453 23@ISA = qw( Exporter IO::Uncompress::RawInflate );
642e522c 24@EXPORT_OK = qw( $GunzipError gunzip );
1a6a8453 25%EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ;
642e522c
RGS
26push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
27Exporter::export_ok_tags('all');
28
642e522c
RGS
29$GunzipError = '';
30
93d092e2 31$VERSION = '2.005';
642e522c 32
1a6a8453 33sub new
642e522c 34{
1a6a8453
PM
35 my $class = shift ;
36 $GunzipError = '';
37 my $obj = createSelfTiedObject($class, \$GunzipError);
642e522c 38
1a6a8453 39 $obj->_create(undef, 0, @_);
642e522c
RGS
40}
41
1a6a8453 42sub gunzip
642e522c 43{
1a6a8453
PM
44 my $obj = createSelfTiedObject(undef, \$GunzipError);
45 return $obj->_inf(@_) ;
642e522c
RGS
46}
47
1a6a8453 48sub getExtraParams
642e522c 49{
93d092e2 50 use IO::Compress::Base::Common 2.005 qw(:Parse);
1a6a8453 51 return ( 'ParseExtra' => [1, 1, Parse_boolean, 0] ) ;
642e522c
RGS
52}
53
1a6a8453 54sub ckParams
642e522c 55{
1a6a8453
PM
56 my $self = shift ;
57 my $got = shift ;
642e522c 58
1a6a8453
PM
59 # gunzip always needs crc32
60 $got->value('CRC32' => 1);
642e522c 61
1a6a8453 62 return 1;
642e522c
RGS
63}
64
1a6a8453 65sub ckMagic
642e522c 66{
1a6a8453 67 my $self = shift;
642e522c 68
1a6a8453
PM
69 my $magic ;
70 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 71
1a6a8453 72 *$self->{HeaderPending} = $magic ;
642e522c 73
1a6a8453
PM
74 return $self->HeaderError("Minimum header size is " .
75 GZIP_MIN_HEADER_SIZE . " bytes")
76 if length $magic != GZIP_ID_SIZE ;
642e522c 77
1a6a8453
PM
78 return $self->HeaderError("Bad Magic")
79 if ! isGzipMagic($magic) ;
642e522c 80
1a6a8453 81 *$self->{Type} = 'rfc1952';
642e522c 82
1a6a8453 83 return $magic ;
642e522c
RGS
84}
85
1a6a8453 86sub readHeader
642e522c 87{
1a6a8453
PM
88 my $self = shift;
89 my $magic = shift;
642e522c 90
1a6a8453 91 return $self->_readGzipHeader($magic);
642e522c
RGS
92}
93
1a6a8453 94sub chkTrailer
642e522c 95{
1a6a8453
PM
96 my $self = shift;
97 my $trailer = shift;
642e522c 98
1a6a8453
PM
99 # Check CRC & ISIZE
100 my ($CRC32, $ISIZE) = unpack("V V", $trailer) ;
101 *$self->{Info}{CRC32} = $CRC32;
102 *$self->{Info}{ISIZE} = $ISIZE;
103
104 if (*$self->{Strict}) {
105 return $self->TrailerError("CRC mismatch")
106 if $CRC32 != *$self->{Uncomp}->crc32() ;
107
e7d45986 108 my $exp_isize = *$self->{UnCompSize}->get32bit();
1a6a8453
PM
109 return $self->TrailerError("ISIZE mismatch. Got $ISIZE"
110 . ", expected $exp_isize")
111 if $ISIZE != $exp_isize ;
642e522c
RGS
112 }
113
a02d0f6f 114 return STATUS_OK;
1a6a8453 115}
642e522c 116
1a6a8453
PM
117sub isGzipMagic
118{
119 my $buffer = shift ;
120 return 0 if length $buffer < GZIP_ID_SIZE ;
121 my ($id1, $id2) = unpack("C C", $buffer) ;
122 return $id1 == GZIP_ID1 && $id2 == GZIP_ID2 ;
642e522c
RGS
123}
124
1a6a8453 125sub _readFullGzipHeader($)
642e522c 126{
1a6a8453
PM
127 my ($self) = @_ ;
128 my $magic = '' ;
642e522c 129
1a6a8453 130 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 131
1a6a8453 132 *$self->{HeaderPending} = $magic ;
642e522c 133
1a6a8453
PM
134 return $self->HeaderError("Minimum header size is " .
135 GZIP_MIN_HEADER_SIZE . " bytes")
136 if length $magic != GZIP_ID_SIZE ;
642e522c 137
642e522c 138
1a6a8453
PM
139 return $self->HeaderError("Bad Magic")
140 if ! isGzipMagic($magic) ;
642e522c 141
1a6a8453
PM
142 my $status = $self->_readGzipHeader($magic);
143 delete *$self->{Transparent} if ! defined $status ;
144 return $status ;
642e522c
RGS
145}
146
1a6a8453 147sub _readGzipHeader($)
642e522c 148{
1a6a8453
PM
149 my ($self, $magic) = @_ ;
150 my ($HeaderCRC) ;
151 my ($buffer) = '' ;
642e522c 152
1a6a8453
PM
153 $self->smartReadExact(\$buffer, GZIP_MIN_HEADER_SIZE - GZIP_ID_SIZE)
154 or return $self->HeaderError("Minimum header size is " .
155 GZIP_MIN_HEADER_SIZE . " bytes") ;
642e522c 156
1a6a8453
PM
157 my $keep = $magic . $buffer ;
158 *$self->{HeaderPending} = $keep ;
642e522c 159
1a6a8453
PM
160 # now split out the various parts
161 my ($cm, $flag, $mtime, $xfl, $os) = unpack("C C V C C", $buffer) ;
642e522c 162
1a6a8453
PM
163 $cm == GZIP_CM_DEFLATED
164 or return $self->HeaderError("Not Deflate (CM is $cm)") ;
642e522c 165
1a6a8453
PM
166 # check for use of reserved bits
167 return $self->HeaderError("Use of Reserved Bits in FLG field.")
168 if $flag & GZIP_FLG_RESERVED ;
642e522c 169
1a6a8453
PM
170 my $EXTRA ;
171 my @EXTRA = () ;
172 if ($flag & GZIP_FLG_FEXTRA) {
173 $EXTRA = "" ;
174 $self->smartReadExact(\$buffer, GZIP_FEXTRA_HEADER_SIZE)
175 or return $self->TruncatedHeader("FEXTRA Length") ;
642e522c 176
1a6a8453
PM
177 my ($XLEN) = unpack("v", $buffer) ;
178 $self->smartReadExact(\$EXTRA, $XLEN)
179 or return $self->TruncatedHeader("FEXTRA Body");
180 $keep .= $buffer . $EXTRA ;
642e522c 181
1a6a8453 182 if ($XLEN && *$self->{'ParseExtra'}) {
c70c1701
PM
183 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($EXTRA,
184 \@EXTRA, 1, 1);
185 return $self->HeaderError($bad)
186 if defined $bad;
1a6a8453
PM
187 }
188 }
642e522c 189
1a6a8453
PM
190 my $origname ;
191 if ($flag & GZIP_FLG_FNAME) {
192 $origname = "" ;
193 while (1) {
194 $self->smartReadExact(\$buffer, 1)
195 or return $self->TruncatedHeader("FNAME");
196 last if $buffer eq GZIP_NULL_BYTE ;
197 $origname .= $buffer
198 }
199 $keep .= $origname . GZIP_NULL_BYTE ;
642e522c 200
1a6a8453
PM
201 return $self->HeaderError("Non ISO 8859-1 Character found in Name")
202 if *$self->{Strict} && $origname =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ;
203 }
642e522c 204
1a6a8453
PM
205 my $comment ;
206 if ($flag & GZIP_FLG_FCOMMENT) {
207 $comment = "";
208 while (1) {
209 $self->smartReadExact(\$buffer, 1)
210 or return $self->TruncatedHeader("FCOMMENT");
211 last if $buffer eq GZIP_NULL_BYTE ;
212 $comment .= $buffer
213 }
214 $keep .= $comment . GZIP_NULL_BYTE ;
642e522c 215
1a6a8453
PM
216 return $self->HeaderError("Non ISO 8859-1 Character found in Comment")
217 if *$self->{Strict} && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o ;
218 }
642e522c 219
1a6a8453
PM
220 if ($flag & GZIP_FLG_FHCRC) {
221 $self->smartReadExact(\$buffer, GZIP_FHCRC_SIZE)
222 or return $self->TruncatedHeader("FHCRC");
642e522c 223
1a6a8453
PM
224 $HeaderCRC = unpack("v", $buffer) ;
225 my $crc16 = crc32($keep) & 0xFF ;
642e522c 226
1a6a8453
PM
227 return $self->HeaderError("CRC16 mismatch.")
228 if *$self->{Strict} && $crc16 != $HeaderCRC;
642e522c 229
1a6a8453
PM
230 $keep .= $buffer ;
231 }
642e522c 232
1a6a8453
PM
233 # Assume compression method is deflated for xfl tests
234 #if ($xfl) {
235 #}
642e522c 236
1a6a8453 237 *$self->{Type} = 'rfc1952';
642e522c 238
1a6a8453
PM
239 return {
240 'Type' => 'rfc1952',
241 'FingerprintLength' => 2,
242 'HeaderLength' => length $keep,
243 'TrailerLength' => GZIP_TRAILER_SIZE,
244 'Header' => $keep,
245 'isMinimalHeader' => $keep eq GZIP_MINIMUM_HEADER ? 1 : 0,
642e522c 246
1a6a8453
PM
247 'MethodID' => $cm,
248 'MethodName' => $cm == GZIP_CM_DEFLATED ? "Deflated" : "Unknown" ,
249 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
250 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
251 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
252 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
253 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
254 'Name' => $origname,
255 'Comment' => $comment,
256 'Time' => $mtime,
257 'OsID' => $os,
258 'OsName' => defined $GZIP_OS_Names{$os}
259 ? $GZIP_OS_Names{$os} : "Unknown",
260 'HeaderCRC' => $HeaderCRC,
261 'Flags' => $flag,
262 'ExtraFlags' => $xfl,
263 'ExtraFieldRaw' => $EXTRA,
264 'ExtraField' => [ @EXTRA ],
642e522c 265
642e522c 266
1a6a8453
PM
267 #'CompSize'=> $compsize,
268 #'CRC32'=> $CRC32,
269 #'OrigSize'=> $ISIZE,
270 }
642e522c
RGS
271}
272
273
1a6a8453 2741;
642e522c 275
642e522c
RGS
276__END__
277
278
279=head1 NAME
280
a02d0f6f 281
cb7abd7f
PM
282
283IO::Uncompress::Gunzip - Read RFC 1952 files/buffers
284
642e522c 285
a02d0f6f 286
642e522c
RGS
287=head1 SYNOPSIS
288
289 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
290
291 my $status = gunzip $input => $output [,OPTS]
292 or die "gunzip failed: $GunzipError\n";
293
294 my $z = new IO::Uncompress::Gunzip $input [OPTS]
295 or die "gunzip failed: $GunzipError\n";
296
297 $status = $z->read($buffer)
298 $status = $z->read($buffer, $length)
299 $status = $z->read($buffer, $length, $offset)
300 $line = $z->getline()
301 $char = $z->getc()
302 $char = $z->ungetc()
a02d0f6f
RGS
303 $char = $z->opened()
304
642e522c 305 $status = $z->inflateSync()
a02d0f6f 306
e7d45986
PM
307 $data = $z->trailingData()
308 $status = $z->nextStream()
642e522c
RGS
309 $data = $z->getHeaderInfo()
310 $z->tell()
311 $z->seek($position, $whence)
312 $z->binmode()
313 $z->fileno()
314 $z->eof()
315 $z->close()
316
317 $GunzipError ;
318
319 # IO::File mode
320
321 <$z>
322 read($z, $buffer);
323 read($z, $buffer, $length);
324 read($z, $buffer, $length, $offset);
325 tell($z)
326 seek($z, $position, $whence)
327 binmode($z)
328 fileno($z)
329 eof($z)
330 close($z)
331
332
333=head1 DESCRIPTION
334
335
336
1a6a8453 337This module provides a Perl interface that allows the reading of
642e522c
RGS
338files/buffers that conform to RFC 1952.
339
1a6a8453 340For writing RFC 1952 files/buffers, see the companion module IO::Compress::Gzip.
642e522c
RGS
341
342
343
cb7abd7f
PM
344
345
642e522c
RGS
346=head1 Functional Interface
347
1a6a8453
PM
348A top-level function, C<gunzip>, is provided to carry out
349"one-shot" uncompression between buffers and/or files. For finer
350control over the uncompression process, see the L</"OO Interface">
351section.
642e522c
RGS
352
353 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
354
355 gunzip $input => $output [,OPTS]
356 or die "gunzip failed: $GunzipError\n";
357
1a6a8453 358
642e522c
RGS
359
360The functional interface needs Perl5.005 or better.
361
362
363=head2 gunzip $input => $output [, OPTS]
364
1a6a8453
PM
365
366C<gunzip> expects at least two parameters, C<$input> and C<$output>.
642e522c
RGS
367
368=head3 The C<$input> parameter
369
370The parameter, C<$input>, is used to define the source of
371the compressed data.
372
373It can take one of the following forms:
374
375=over 5
376
377=item A filename
378
379If the C<$input> parameter is a simple scalar, it is assumed to be a
380filename. This file will be opened for reading and the input data
381will be read from it.
382
383=item A filehandle
384
385If the C<$input> parameter is a filehandle, the input data will be
386read from it.
387The string '-' can be used as an alias for standard input.
388
389=item A scalar reference
390
391If C<$input> is a scalar reference, the input data will be read
392from C<$$input>.
393
394=item An array reference
395
1a6a8453
PM
396If C<$input> is an array reference, each element in the array must be a
397filename.
398
399The input data will be read from each file in turn.
400
642e522c 401The complete array will be walked to ensure that it only
1a6a8453
PM
402contains valid filenames before any data is uncompressed.
403
404
642e522c
RGS
405
406=item An Input FileGlob string
407
408If C<$input> is a string that is delimited by the characters "<" and ">"
409C<gunzip> will assume that it is an I<input fileglob string>. The
410input is the list of files that match the fileglob.
411
412If the fileglob does not match any files ...
413
414See L<File::GlobMapper|File::GlobMapper> for more details.
415
416
417=back
418
419If the C<$input> parameter is any other type, C<undef> will be returned.
420
421
422
423=head3 The C<$output> parameter
424
425The parameter C<$output> is used to control the destination of the
426uncompressed data. This parameter can take one of these forms.
427
428=over 5
429
430=item A filename
431
1a6a8453
PM
432If the C<$output> parameter is a simple scalar, it is assumed to be a
433filename. This file will be opened for writing and the uncompressed
434data will be written to it.
642e522c
RGS
435
436=item A filehandle
437
1a6a8453
PM
438If the C<$output> parameter is a filehandle, the uncompressed data
439will be written to it.
642e522c
RGS
440The string '-' can be used as an alias for standard output.
441
442
443=item A scalar reference
444
1a6a8453
PM
445If C<$output> is a scalar reference, the uncompressed data will be
446stored in C<$$output>.
642e522c
RGS
447
448
642e522c
RGS
449
450=item An Array Reference
451
1a6a8453
PM
452If C<$output> is an array reference, the uncompressed data will be
453pushed onto the array.
642e522c
RGS
454
455=item An Output FileGlob
456
457If C<$output> is a string that is delimited by the characters "<" and ">"
458C<gunzip> will assume that it is an I<output fileglob string>. The
459output is the list of files that match the fileglob.
460
461When C<$output> is an fileglob string, C<$input> must also be a fileglob
462string. Anything else is an error.
463
464=back
465
466If the C<$output> parameter is any other type, C<undef> will be returned.
467
642e522c 468
642e522c
RGS
469
470=head2 Notes
471
c70c1701
PM
472
473When C<$input> maps to multiple compressed files/buffers and C<$output> is
474a single file/buffer, after uncompression C<$output> will contain a
475concatenation of all the uncompressed data from each of the input
476files/buffers.
477
478
642e522c
RGS
479
480
481
482=head2 Optional Parameters
483
484Unless specified below, the optional parameters for C<gunzip>,
485C<OPTS>, are the same as those used with the OO interface defined in the
486L</"Constructor Options"> section below.
487
488=over 5
489
e7d45986 490=item C<< AutoClose => 0|1 >>
642e522c 491
1a6a8453
PM
492This option applies to any input or output data streams to
493C<gunzip> that are filehandles.
642e522c
RGS
494
495If C<AutoClose> is specified, and the value is true, it will result in all
496input and/or output filehandles being closed once C<gunzip> has
497completed.
498
499This parameter defaults to 0.
500
501
e7d45986 502=item C<< BinModeOut => 0|1 >>
1a6a8453
PM
503
504When writing to a file or filehandle, set C<binmode> before writing to the
505file.
506
507Defaults to 0.
508
509
510
511
512
e7d45986 513=item C<< Append => 0|1 >>
642e522c
RGS
514
515TODO
516
e7d45986 517=item C<< MultiStream => 0|1 >>
1a6a8453 518
258133d1 519
e7d45986
PM
520If the input file/buffer contains multiple compressed data streams, this
521option will uncompress the whole lot as a single data stream.
1a6a8453 522
e7d45986 523Defaults to 0.
1a6a8453 524
642e522c
RGS
525
526
258133d1
PM
527
528
529=item C<< TrailingData => $scalar >>
530
531Returns the data, if any, that is present immediately after the compressed
532data stream once uncompression is complete.
533
534This option can be used when there is useful information immediately
535following the compressed data stream, and you don't know the length of the
536compressed data stream.
537
538If the input is a buffer, C<trailingData> will return everything from the
539end of the compressed data stream to the end of the buffer.
540
541If the input is a filehandle, C<trailingData> will return the data that is
542left in the filehandle input buffer once the end of the compressed data
543stream has been reached. You can then use the filehandle to read the rest
544of the input file.
545
546Don't bother using C<trailingData> if the input is a filename.
547
548
549
550If you know the length of the compressed data stream before you start
551uncompressing, you can avoid having to use C<trailingData> by setting the
552C<InputLength> option.
553
554
555
642e522c
RGS
556=back
557
558
559
560
561=head2 Examples
562
563To read the contents of the file C<file1.txt.gz> and write the
564compressed data to the file C<file1.txt>.
565
566 use strict ;
567 use warnings ;
568 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
569
570 my $input = "file1.txt.gz";
571 my $output = "file1.txt";
572 gunzip $input => $output
573 or die "gunzip failed: $GunzipError\n";
574
575
576To read from an existing Perl filehandle, C<$input>, and write the
577uncompressed data to a buffer, C<$buffer>.
578
579 use strict ;
580 use warnings ;
581 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
582 use IO::File ;
583
584 my $input = new IO::File "<file1.txt.gz"
585 or die "Cannot open 'file1.txt.gz': $!\n" ;
586 my $buffer ;
587 gunzip $input => \$buffer
588 or die "gunzip failed: $GunzipError\n";
589
590To uncompress all files in the directory "/my/home" that match "*.txt.gz" and store the compressed data in the same directory
591
592 use strict ;
593 use warnings ;
594 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
595
596 gunzip '</my/home/*.txt.gz>' => '</my/home/#1.txt>'
597 or die "gunzip failed: $GunzipError\n";
598
599and if you want to compress each file one at a time, this will do the trick
600
601 use strict ;
602 use warnings ;
603 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
604
605 for my $input ( glob "/my/home/*.txt.gz" )
606 {
607 my $output = $input;
608 $output =~ s/.gz// ;
609 gunzip $input => $output
610 or die "Error compressing '$input': $GunzipError\n";
611 }
612
613=head1 OO Interface
614
615=head2 Constructor
616
617The format of the constructor for IO::Uncompress::Gunzip is shown below
618
619
620 my $z = new IO::Uncompress::Gunzip $input [OPTS]
621 or die "IO::Uncompress::Gunzip failed: $GunzipError\n";
622
623Returns an C<IO::Uncompress::Gunzip> object on success and undef on failure.
624The variable C<$GunzipError> will contain an error message on failure.
625
1a6a8453
PM
626If you are running Perl 5.005 or better the object, C<$z>, returned from
627IO::Uncompress::Gunzip can be used exactly like an L<IO::File|IO::File> filehandle.
628This means that all normal input file operations can be carried out with
629C<$z>. For example, to read a line from a compressed file/buffer you can
630use either of these forms
642e522c
RGS
631
632 $line = $z->getline();
633 $line = <$z>;
634
635The mandatory parameter C<$input> is used to determine the source of the
636compressed data. This parameter can take one of three forms.
637
638=over 5
639
640=item A filename
641
642If the C<$input> parameter is a scalar, it is assumed to be a filename. This
643file will be opened for reading and the compressed data will be read from it.
644
645=item A filehandle
646
647If the C<$input> parameter is a filehandle, the compressed data will be
648read from it.
649The string '-' can be used as an alias for standard input.
650
651
652=item A scalar reference
653
654If C<$input> is a scalar reference, the compressed data will be read from
655C<$$output>.
656
657=back
658
659=head2 Constructor Options
660
661
662The option names defined below are case insensitive and can be optionally
663prefixed by a '-'. So all of the following are valid
664
665 -AutoClose
666 -autoclose
667 AUTOCLOSE
668 autoclose
669
670OPTS is a combination of the following options:
671
672=over 5
673
e7d45986 674=item C<< AutoClose => 0|1 >>
642e522c
RGS
675
676This option is only valid when the C<$input> parameter is a filehandle. If
677specified, and the value is true, it will result in the file being closed once
678either the C<close> method is called or the IO::Uncompress::Gunzip object is
679destroyed.
680
681This parameter defaults to 0.
682
e7d45986 683=item C<< MultiStream => 0|1 >>
642e522c
RGS
684
685
686
687Allows multiple concatenated compressed streams to be treated as a single
688compressed stream. Decompression will stop once either the end of the
689file/buffer is reached, an error is encountered (premature eof, corrupt
690compressed data) or the end of a stream is not immediately followed by the
691start of another stream.
692
693This parameter defaults to 0.
694
695
e7d45986 696=item C<< Prime => $string >>
642e522c
RGS
697
698This option will uncompress the contents of C<$string> before processing the
699input file/buffer.
700
701This option can be useful when the compressed data is embedded in another
702file/data structure and it is not possible to work out where the compressed
1a6a8453
PM
703data begins without having to read the first few bytes. If this is the
704case, the uncompression can be I<primed> with these bytes using this
705option.
642e522c 706
e7d45986 707=item C<< Transparent => 0|1 >>
642e522c 708
f6fd7794 709If this option is set and the input file/buffer is not compressed data,
642e522c
RGS
710the module will allow reading of it anyway.
711
f6fd7794
PM
712In addition, if the input file/buffer does contain compressed data and
713there is non-compressed data immediately following it, setting this option
714will make this module treat the whole file/bufffer as a single data stream.
715
642e522c
RGS
716This option defaults to 1.
717
e7d45986 718=item C<< BlockSize => $num >>
642e522c 719
1a6a8453
PM
720When reading the compressed input data, IO::Uncompress::Gunzip will read it in
721blocks of C<$num> bytes.
642e522c
RGS
722
723This option defaults to 4096.
724
e7d45986 725=item C<< InputLength => $size >>
642e522c 726
1a6a8453
PM
727When present this option will limit the number of compressed bytes read
728from the input file/buffer to C<$size>. This option can be used in the
729situation where there is useful data directly after the compressed data
730stream and you know beforehand the exact length of the compressed data
731stream.
642e522c 732
1a6a8453
PM
733This option is mostly used when reading from a filehandle, in which case
734the file pointer will be left pointing to the first byte directly after the
642e522c
RGS
735compressed data stream.
736
737
738
739This option defaults to off.
740
e7d45986 741=item C<< Append => 0|1 >>
642e522c
RGS
742
743This option controls what the C<read> method does with uncompressed data.
744
1a6a8453
PM
745If set to 1, all uncompressed data will be appended to the output parameter
746of the C<read> method.
642e522c 747
1a6a8453
PM
748If set to 0, the contents of the output parameter of the C<read> method
749will be overwritten by the uncompressed data.
642e522c
RGS
750
751Defaults to 0.
752
e7d45986 753=item C<< Strict => 0|1 >>
642e522c
RGS
754
755
756
757This option controls whether the extra checks defined below are used when
1a6a8453
PM
758carrying out the decompression. When Strict is on, the extra tests are
759carried out, when Strict is off they are not.
642e522c
RGS
760
761The default for this option is off.
762
763
764
765
766
767
768
769
770
771=over 5
772
773=item 1
774
775If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
776header must match the crc16 value of the gzip header actually read.
777
778=item 2
779
780If the gzip header contains a name field (FNAME) it consists solely of ISO
7818859-1 characters.
782
783=item 3
784
1a6a8453
PM
785If the gzip header contains a comment field (FCOMMENT) it consists solely
786of ISO 8859-1 characters plus line-feed.
642e522c
RGS
787
788=item 4
789
790If the gzip FEXTRA header field is present it must conform to the sub-field
a02d0f6f 791structure as defined in RFC 1952.
642e522c
RGS
792
793=item 5
794
795The CRC32 and ISIZE trailer fields must be present.
796
797=item 6
798
799The value of the CRC32 field read must match the crc32 value of the
800uncompressed data actually contained in the gzip file.
801
802=item 7
803
1a6a8453
PM
804The value of the ISIZE fields read must match the length of the
805uncompressed data actually read from the file.
642e522c
RGS
806
807=back
808
809
810
811
812
813
642e522c 814
258133d1 815=item C<< ParseExtra => 0|1 >>
642e522c
RGS
816If the gzip FEXTRA header field is present and this option is set, it will
817force the module to check that it conforms to the sub-field structure as
a02d0f6f 818defined in RFC 1952.
642e522c
RGS
819
820If the C<Strict> is on it will automatically enable this option.
821
822Defaults to 0.
823
824
825
a02d0f6f 826
642e522c
RGS
827=back
828
829=head2 Examples
830
831TODO
832
833=head1 Methods
834
835=head2 read
836
837Usage is
838
839 $status = $z->read($buffer)
840
841Reads a block of compressed data (the size the the compressed block is
842determined by the C<Buffer> option in the constructor), uncompresses it and
1a6a8453
PM
843writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
844set in the constructor, the uncompressed data will be appended to the
845C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
642e522c 846
1a6a8453
PM
847Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
848or a negative number on error.
642e522c
RGS
849
850=head2 read
851
852Usage is
853
854 $status = $z->read($buffer, $length)
855 $status = $z->read($buffer, $length, $offset)
856
857 $status = read($z, $buffer, $length)
858 $status = read($z, $buffer, $length, $offset)
859
860Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
861
1a6a8453
PM
862The main difference between this form of the C<read> method and the
863previous one, is that this one will attempt to return I<exactly> C<$length>
864bytes. The only circumstances that this function will not is if end-of-file
865or an IO error is encountered.
642e522c 866
1a6a8453
PM
867Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
868or a negative number on error.
642e522c
RGS
869
870
871=head2 getline
872
873Usage is
874
875 $line = $z->getline()
876 $line = <$z>
877
878Reads a single line.
879
258133d1
PM
880This method fully supports the use of of the variable C<$/> (or
881C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
882determine what constitutes an end of line. Paragraph mode, record mode and
883file slurp mode are all supported.
642e522c
RGS
884
885
886=head2 getc
887
888Usage is
889
890 $char = $z->getc()
891
892Read a single character.
893
894=head2 ungetc
895
896Usage is
897
898 $char = $z->ungetc($string)
899
900
a02d0f6f 901
642e522c
RGS
902=head2 inflateSync
903
904Usage is
905
906 $status = $z->inflateSync()
907
908TODO
909
a02d0f6f 910
642e522c
RGS
911=head2 getHeaderInfo
912
913Usage is
914
1a6a8453
PM
915 $hdr = $z->getHeaderInfo();
916 @hdrs = $z->getHeaderInfo();
642e522c 917
1a6a8453
PM
918This method returns either a hash reference (in scalar context) or a list
919or hash references (in array context) that contains information about each
920of the header fields in the compressed data stream(s).
642e522c
RGS
921
922
923
1a6a8453 924=over 5
642e522c 925
1a6a8453 926=item Name
642e522c 927
1a6a8453
PM
928The contents of the Name header field, if present. If no name is
929present, the value will be undef. Note this is different from a zero length
930name, which will return an empty string.
642e522c
RGS
931
932=item Comment
933
1a6a8453
PM
934The contents of the Comment header field, if present. If no comment is
935present, the value will be undef. Note this is different from a zero length
936comment, which will return an empty string.
642e522c
RGS
937
938=back
939
940
941
942
943=head2 tell
944
945Usage is
946
947 $z->tell()
948 tell $z
949
950Returns the uncompressed file offset.
951
952=head2 eof
953
954Usage is
955
956 $z->eof();
957 eof($z);
958
959
960
961Returns true if the end of the compressed input stream has been reached.
962
963
964
965=head2 seek
966
967 $z->seek($position, $whence);
968 seek($z, $position, $whence);
969
970
971
972
973Provides a sub-set of the C<seek> functionality, with the restriction
974that it is only legal to seek forward in the input file/buffer.
975It is a fatal error to attempt to seek backward.
976
977
978
979The C<$whence> parameter takes one the usual values, namely SEEK_SET,
980SEEK_CUR or SEEK_END.
981
982Returns 1 on success, 0 on failure.
983
984=head2 binmode
985
986Usage is
987
988 $z->binmode
989 binmode $z ;
990
991This is a noop provided for completeness.
992
a02d0f6f
RGS
993=head2 opened
994
995 $z->opened()
996
997Returns true if the object currently refers to a opened file/buffer.
998
999=head2 autoflush
1000
1001 my $prev = $z->autoflush()
1002 my $prev = $z->autoflush(EXPR)
1003
1004If the C<$z> object is associated with a file or a filehandle, this method
1005returns the current autoflush setting for the underlying filehandle. If
1006C<EXPR> is present, and is non-zero, it will enable flushing after every
1007write/print operation.
1008
1009If C<$z> is associated with a buffer, this method has no effect and always
1010returns C<undef>.
1011
1012B<Note> that the special variable C<$|> B<cannot> be used to set or
1013retrieve the autoflush setting.
1014
1015=head2 input_line_number
1016
1017 $z->input_line_number()
1018 $z->input_line_number(EXPR)
1019
1020
1021
1022Returns the current uncompressed line number. If C<EXPR> is present it has
1023the effect of setting the line number. Note that setting the line number
1024does not change the current position within the file/buffer being read.
1025
1026The contents of C<$/> are used to to determine what constitutes a line
1027terminator.
1028
1029
1030
642e522c
RGS
1031=head2 fileno
1032
1033 $z->fileno()
1034 fileno($z)
1035
a02d0f6f
RGS
1036If the C<$z> object is associated with a file or a filehandle, this method
1037will return the underlying file descriptor.
642e522c
RGS
1038
1039If the C<$z> object is is associated with a buffer, this method will
1040return undef.
1041
1042=head2 close
1043
1044 $z->close() ;
1045 close $z ;
1046
1047
1048
1049Closes the output file/buffer.
1050
1051
1052
1053For most versions of Perl this method will be automatically invoked if
1054the IO::Uncompress::Gunzip object is destroyed (either explicitly or by the
1055variable with the reference to the object going out of scope). The
1056exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1057these cases, the C<close> method will be called automatically, but
1058not until global destruction of all live objects when the program is
1059terminating.
1060
1061Therefore, if you want your scripts to be able to run on all versions
1062of Perl, you should call C<close> explicitly and not rely on automatic
1063closing.
1064
1065Returns true on success, otherwise 0.
1066
1067If the C<AutoClose> option has been enabled when the IO::Uncompress::Gunzip
1068object was created, and the object is associated with a file, the
1069underlying file will also be closed.
1070
1071
1072
1073
e7d45986
PM
1074=head2 nextStream
1075
1076Usage is
1077
1078 my $status = $z->nextStream();
1079
1080Skips to the next compressed data stream in the input file/buffer. If a new
258133d1
PM
1081compressed data stream is found, the eof marker will be cleared and C<$.>
1082will be reset to 0.
e7d45986
PM
1083
1084Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1085error was encountered.
1086
1087=head2 trailingData
1088
1089Usage is
1090
1091 my $data = $z->trailingData();
1092
258133d1
PM
1093Returns the data, if any, that is present immediately after the compressed
1094data stream once uncompression is complete. It only makes sense to call
1095this method once the end of the compressed data stream has been
1096encountered.
1097
1098This option can be used when there is useful information immediately
1099following the compressed data stream, and you don't know the length of the
1100compressed data stream.
1101
1102If the input is a buffer, C<trailingData> will return everything from the
1103end of the compressed data stream to the end of the buffer.
1104
1105If the input is a filehandle, C<trailingData> will return the data that is
1106left in the filehandle input buffer once the end of the compressed data
1107stream has been reached. You can then use the filehandle to read the rest
1108of the input file.
1109
1110Don't bother using C<trailingData> if the input is a filename.
1111
1112
1113
1114If you know the length of the compressed data stream before you start
1115uncompressing, you can avoid having to use C<trailingData> by setting the
1116C<InputLength> option in the constructor.
e7d45986 1117
642e522c
RGS
1118=head1 Importing
1119
1120No symbolic constants are required by this IO::Uncompress::Gunzip at present.
1121
1122=over 5
1123
1124=item :all
1125
1126Imports C<gunzip> and C<$GunzipError>.
1127Same as doing this
1128
1129 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
1130
1131=back
1132
1133=head1 EXAMPLES
1134
1135
1136
1137
1138=head1 SEE ALSO
1139
258133d1 1140L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
642e522c
RGS
1141
1142L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1143
a02d0f6f
RGS
1144L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1145L<Archive::Tar|Archive::Tar>,
642e522c
RGS
1146L<IO::Zlib|IO::Zlib>
1147
a02d0f6f 1148
642e522c
RGS
1149For RFC 1950, 1951 and 1952 see
1150F<http://www.faqs.org/rfcs/rfc1950.html>,
1151F<http://www.faqs.org/rfcs/rfc1951.html> and
1152F<http://www.faqs.org/rfcs/rfc1952.html>
1153
a02d0f6f
RGS
1154The I<zlib> compression library was written by Jean-loup Gailly
1155F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1156
1157The primary site for the I<zlib> compression library is
1158F<http://www.zlib.org>.
1159
1160The primary site for gzip is F<http://www.gzip.org>.
1161
1162
1163
1164
642e522c
RGS
1165=head1 AUTHOR
1166
cb7abd7f 1167This module was written by Paul Marquess, F<pmqs@cpan.org>.
642e522c 1168
642e522c 1169
642e522c
RGS
1170
1171=head1 MODIFICATION HISTORY
1172
1173See the Changes file.
1174
1175=head1 COPYRIGHT AND LICENSE
642e522c 1176
b0cda13f 1177Copyright (c) 2005-2007 Paul Marquess. All rights reserved.
a02d0f6f 1178
642e522c
RGS
1179This program is free software; you can redistribute it and/or
1180modify it under the same terms as Perl itself.
1181