This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Re: [perl #40583] sprintf "%#04X" also uppercases the 0x-prefix
[perl5.git] / ext / Compress / IO / Zlib / lib / IO / Uncompress / Gunzip.pm
CommitLineData
642e522c
RGS
1
2package IO::Uncompress::Gunzip ;
3
4require 5.004 ;
5
6# for RFC1952
7
8use strict ;
9use warnings;
a02d0f6f 10use bytes;
642e522c 11
1a6a8453
PM
12use IO::Uncompress::RawInflate ;
13
a02d0f6f
RGS
14use Compress::Raw::Zlib qw( crc32 ) ;
15use IO::Compress::Base::Common qw(:Status createSelfTiedObject);
16use IO::Compress::Gzip::Constants;
c70c1701 17use IO::Compress::Zlib::Extra;
1a6a8453 18
642e522c
RGS
19require Exporter ;
20
21our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $GunzipError);
22
1a6a8453 23@ISA = qw( Exporter IO::Uncompress::RawInflate );
642e522c 24@EXPORT_OK = qw( $GunzipError gunzip );
1a6a8453 25%EXPORT_TAGS = %IO::Uncompress::RawInflate::DEFLATE_CONSTANTS ;
642e522c
RGS
26push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
27Exporter::export_ok_tags('all');
28
642e522c
RGS
29$GunzipError = '';
30
e7d45986 31$VERSION = '2.000_13';
642e522c 32
1a6a8453 33sub new
642e522c 34{
1a6a8453
PM
35 my $class = shift ;
36 $GunzipError = '';
37 my $obj = createSelfTiedObject($class, \$GunzipError);
642e522c 38
1a6a8453 39 $obj->_create(undef, 0, @_);
642e522c
RGS
40}
41
1a6a8453 42sub gunzip
642e522c 43{
1a6a8453
PM
44 my $obj = createSelfTiedObject(undef, \$GunzipError);
45 return $obj->_inf(@_) ;
642e522c
RGS
46}
47
1a6a8453 48sub getExtraParams
642e522c 49{
a02d0f6f 50 use IO::Compress::Base::Common qw(:Parse);
1a6a8453 51 return ( 'ParseExtra' => [1, 1, Parse_boolean, 0] ) ;
642e522c
RGS
52}
53
1a6a8453 54sub ckParams
642e522c 55{
1a6a8453
PM
56 my $self = shift ;
57 my $got = shift ;
642e522c 58
1a6a8453
PM
59 # gunzip always needs crc32
60 $got->value('CRC32' => 1);
642e522c 61
1a6a8453 62 return 1;
642e522c
RGS
63}
64
1a6a8453 65sub ckMagic
642e522c 66{
1a6a8453 67 my $self = shift;
642e522c 68
1a6a8453
PM
69 my $magic ;
70 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 71
1a6a8453 72 *$self->{HeaderPending} = $magic ;
642e522c 73
1a6a8453
PM
74 return $self->HeaderError("Minimum header size is " .
75 GZIP_MIN_HEADER_SIZE . " bytes")
76 if length $magic != GZIP_ID_SIZE ;
642e522c 77
1a6a8453
PM
78 return $self->HeaderError("Bad Magic")
79 if ! isGzipMagic($magic) ;
642e522c 80
1a6a8453 81 *$self->{Type} = 'rfc1952';
642e522c 82
1a6a8453 83 return $magic ;
642e522c
RGS
84}
85
1a6a8453 86sub readHeader
642e522c 87{
1a6a8453
PM
88 my $self = shift;
89 my $magic = shift;
642e522c 90
1a6a8453 91 return $self->_readGzipHeader($magic);
642e522c
RGS
92}
93
1a6a8453 94sub chkTrailer
642e522c 95{
1a6a8453
PM
96 my $self = shift;
97 my $trailer = shift;
642e522c 98
1a6a8453
PM
99 # Check CRC & ISIZE
100 my ($CRC32, $ISIZE) = unpack("V V", $trailer) ;
101 *$self->{Info}{CRC32} = $CRC32;
102 *$self->{Info}{ISIZE} = $ISIZE;
103
104 if (*$self->{Strict}) {
105 return $self->TrailerError("CRC mismatch")
106 if $CRC32 != *$self->{Uncomp}->crc32() ;
107
e7d45986 108 my $exp_isize = *$self->{UnCompSize}->get32bit();
1a6a8453
PM
109 return $self->TrailerError("ISIZE mismatch. Got $ISIZE"
110 . ", expected $exp_isize")
111 if $ISIZE != $exp_isize ;
642e522c
RGS
112 }
113
a02d0f6f 114 return STATUS_OK;
1a6a8453 115}
642e522c 116
1a6a8453
PM
117sub isGzipMagic
118{
119 my $buffer = shift ;
120 return 0 if length $buffer < GZIP_ID_SIZE ;
121 my ($id1, $id2) = unpack("C C", $buffer) ;
122 return $id1 == GZIP_ID1 && $id2 == GZIP_ID2 ;
642e522c
RGS
123}
124
1a6a8453 125sub _readFullGzipHeader($)
642e522c 126{
1a6a8453
PM
127 my ($self) = @_ ;
128 my $magic = '' ;
642e522c 129
1a6a8453 130 $self->smartReadExact(\$magic, GZIP_ID_SIZE);
642e522c 131
1a6a8453 132 *$self->{HeaderPending} = $magic ;
642e522c 133
1a6a8453
PM
134 return $self->HeaderError("Minimum header size is " .
135 GZIP_MIN_HEADER_SIZE . " bytes")
136 if length $magic != GZIP_ID_SIZE ;
642e522c 137
642e522c 138
1a6a8453
PM
139 return $self->HeaderError("Bad Magic")
140 if ! isGzipMagic($magic) ;
642e522c 141
1a6a8453
PM
142 my $status = $self->_readGzipHeader($magic);
143 delete *$self->{Transparent} if ! defined $status ;
144 return $status ;
642e522c
RGS
145}
146
1a6a8453 147sub _readGzipHeader($)
642e522c 148{
1a6a8453
PM
149 my ($self, $magic) = @_ ;
150 my ($HeaderCRC) ;
151 my ($buffer) = '' ;
642e522c 152
1a6a8453
PM
153 $self->smartReadExact(\$buffer, GZIP_MIN_HEADER_SIZE - GZIP_ID_SIZE)
154 or return $self->HeaderError("Minimum header size is " .
155 GZIP_MIN_HEADER_SIZE . " bytes") ;
642e522c 156
1a6a8453
PM
157 my $keep = $magic . $buffer ;
158 *$self->{HeaderPending} = $keep ;
642e522c 159
1a6a8453
PM
160 # now split out the various parts
161 my ($cm, $flag, $mtime, $xfl, $os) = unpack("C C V C C", $buffer) ;
642e522c 162
1a6a8453
PM
163 $cm == GZIP_CM_DEFLATED
164 or return $self->HeaderError("Not Deflate (CM is $cm)") ;
642e522c 165
1a6a8453
PM
166 # check for use of reserved bits
167 return $self->HeaderError("Use of Reserved Bits in FLG field.")
168 if $flag & GZIP_FLG_RESERVED ;
642e522c 169
1a6a8453
PM
170 my $EXTRA ;
171 my @EXTRA = () ;
172 if ($flag & GZIP_FLG_FEXTRA) {
173 $EXTRA = "" ;
174 $self->smartReadExact(\$buffer, GZIP_FEXTRA_HEADER_SIZE)
175 or return $self->TruncatedHeader("FEXTRA Length") ;
642e522c 176
1a6a8453
PM
177 my ($XLEN) = unpack("v", $buffer) ;
178 $self->smartReadExact(\$EXTRA, $XLEN)
179 or return $self->TruncatedHeader("FEXTRA Body");
180 $keep .= $buffer . $EXTRA ;
642e522c 181
1a6a8453 182 if ($XLEN && *$self->{'ParseExtra'}) {
c70c1701
PM
183 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($EXTRA,
184 \@EXTRA, 1, 1);
185 return $self->HeaderError($bad)
186 if defined $bad;
1a6a8453
PM
187 }
188 }
642e522c 189
1a6a8453
PM
190 my $origname ;
191 if ($flag & GZIP_FLG_FNAME) {
192 $origname = "" ;
193 while (1) {
194 $self->smartReadExact(\$buffer, 1)
195 or return $self->TruncatedHeader("FNAME");
196 last if $buffer eq GZIP_NULL_BYTE ;
197 $origname .= $buffer
198 }
199 $keep .= $origname . GZIP_NULL_BYTE ;
642e522c 200
1a6a8453
PM
201 return $self->HeaderError("Non ISO 8859-1 Character found in Name")
202 if *$self->{Strict} && $origname =~ /$GZIP_FNAME_INVALID_CHAR_RE/o ;
203 }
642e522c 204
1a6a8453
PM
205 my $comment ;
206 if ($flag & GZIP_FLG_FCOMMENT) {
207 $comment = "";
208 while (1) {
209 $self->smartReadExact(\$buffer, 1)
210 or return $self->TruncatedHeader("FCOMMENT");
211 last if $buffer eq GZIP_NULL_BYTE ;
212 $comment .= $buffer
213 }
214 $keep .= $comment . GZIP_NULL_BYTE ;
642e522c 215
1a6a8453
PM
216 return $self->HeaderError("Non ISO 8859-1 Character found in Comment")
217 if *$self->{Strict} && $comment =~ /$GZIP_FCOMMENT_INVALID_CHAR_RE/o ;
218 }
642e522c 219
1a6a8453
PM
220 if ($flag & GZIP_FLG_FHCRC) {
221 $self->smartReadExact(\$buffer, GZIP_FHCRC_SIZE)
222 or return $self->TruncatedHeader("FHCRC");
642e522c 223
1a6a8453
PM
224 $HeaderCRC = unpack("v", $buffer) ;
225 my $crc16 = crc32($keep) & 0xFF ;
642e522c 226
1a6a8453
PM
227 return $self->HeaderError("CRC16 mismatch.")
228 if *$self->{Strict} && $crc16 != $HeaderCRC;
642e522c 229
1a6a8453
PM
230 $keep .= $buffer ;
231 }
642e522c 232
1a6a8453
PM
233 # Assume compression method is deflated for xfl tests
234 #if ($xfl) {
235 #}
642e522c 236
1a6a8453 237 *$self->{Type} = 'rfc1952';
642e522c 238
1a6a8453
PM
239 return {
240 'Type' => 'rfc1952',
241 'FingerprintLength' => 2,
242 'HeaderLength' => length $keep,
243 'TrailerLength' => GZIP_TRAILER_SIZE,
244 'Header' => $keep,
245 'isMinimalHeader' => $keep eq GZIP_MINIMUM_HEADER ? 1 : 0,
642e522c 246
1a6a8453
PM
247 'MethodID' => $cm,
248 'MethodName' => $cm == GZIP_CM_DEFLATED ? "Deflated" : "Unknown" ,
249 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0,
250 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
251 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0,
252 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
253 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
254 'Name' => $origname,
255 'Comment' => $comment,
256 'Time' => $mtime,
257 'OsID' => $os,
258 'OsName' => defined $GZIP_OS_Names{$os}
259 ? $GZIP_OS_Names{$os} : "Unknown",
260 'HeaderCRC' => $HeaderCRC,
261 'Flags' => $flag,
262 'ExtraFlags' => $xfl,
263 'ExtraFieldRaw' => $EXTRA,
264 'ExtraField' => [ @EXTRA ],
642e522c 265
642e522c 266
1a6a8453
PM
267 #'CompSize'=> $compsize,
268 #'CRC32'=> $CRC32,
269 #'OrigSize'=> $ISIZE,
270 }
642e522c
RGS
271}
272
273
1a6a8453 2741;
642e522c 275
642e522c
RGS
276__END__
277
278
279=head1 NAME
280
a02d0f6f 281
cb7abd7f
PM
282
283IO::Uncompress::Gunzip - Read RFC 1952 files/buffers
284
642e522c 285
a02d0f6f 286
642e522c
RGS
287=head1 SYNOPSIS
288
289 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
290
291 my $status = gunzip $input => $output [,OPTS]
292 or die "gunzip failed: $GunzipError\n";
293
294 my $z = new IO::Uncompress::Gunzip $input [OPTS]
295 or die "gunzip failed: $GunzipError\n";
296
297 $status = $z->read($buffer)
298 $status = $z->read($buffer, $length)
299 $status = $z->read($buffer, $length, $offset)
300 $line = $z->getline()
301 $char = $z->getc()
302 $char = $z->ungetc()
a02d0f6f
RGS
303 $char = $z->opened()
304
642e522c 305 $status = $z->inflateSync()
a02d0f6f 306
e7d45986
PM
307 $data = $z->trailingData()
308 $status = $z->nextStream()
642e522c
RGS
309 $data = $z->getHeaderInfo()
310 $z->tell()
311 $z->seek($position, $whence)
312 $z->binmode()
313 $z->fileno()
314 $z->eof()
315 $z->close()
316
317 $GunzipError ;
318
319 # IO::File mode
320
321 <$z>
322 read($z, $buffer);
323 read($z, $buffer, $length);
324 read($z, $buffer, $length, $offset);
325 tell($z)
326 seek($z, $position, $whence)
327 binmode($z)
328 fileno($z)
329 eof($z)
330 close($z)
331
332
333=head1 DESCRIPTION
334
335
336
337B<WARNING -- This is a Beta release>.
338
339=over 5
340
341=item * DO NOT use in production code.
342
343=item * The documentation is incomplete in places.
344
345=item * Parts of the interface defined here are tentative.
346
347=item * Please report any problems you find.
348
349=back
350
351
352
353
354
1a6a8453 355This module provides a Perl interface that allows the reading of
642e522c
RGS
356files/buffers that conform to RFC 1952.
357
1a6a8453 358For writing RFC 1952 files/buffers, see the companion module IO::Compress::Gzip.
642e522c
RGS
359
360
361
cb7abd7f
PM
362
363
364
642e522c
RGS
365=head1 Functional Interface
366
1a6a8453
PM
367A top-level function, C<gunzip>, is provided to carry out
368"one-shot" uncompression between buffers and/or files. For finer
369control over the uncompression process, see the L</"OO Interface">
370section.
642e522c
RGS
371
372 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
373
374 gunzip $input => $output [,OPTS]
375 or die "gunzip failed: $GunzipError\n";
376
1a6a8453 377
642e522c
RGS
378
379The functional interface needs Perl5.005 or better.
380
381
382=head2 gunzip $input => $output [, OPTS]
383
1a6a8453
PM
384
385C<gunzip> expects at least two parameters, C<$input> and C<$output>.
642e522c
RGS
386
387=head3 The C<$input> parameter
388
389The parameter, C<$input>, is used to define the source of
390the compressed data.
391
392It can take one of the following forms:
393
394=over 5
395
396=item A filename
397
398If the C<$input> parameter is a simple scalar, it is assumed to be a
399filename. This file will be opened for reading and the input data
400will be read from it.
401
402=item A filehandle
403
404If the C<$input> parameter is a filehandle, the input data will be
405read from it.
406The string '-' can be used as an alias for standard input.
407
408=item A scalar reference
409
410If C<$input> is a scalar reference, the input data will be read
411from C<$$input>.
412
413=item An array reference
414
1a6a8453
PM
415If C<$input> is an array reference, each element in the array must be a
416filename.
417
418The input data will be read from each file in turn.
419
642e522c 420The complete array will be walked to ensure that it only
1a6a8453
PM
421contains valid filenames before any data is uncompressed.
422
423
642e522c
RGS
424
425=item An Input FileGlob string
426
427If C<$input> is a string that is delimited by the characters "<" and ">"
428C<gunzip> will assume that it is an I<input fileglob string>. The
429input is the list of files that match the fileglob.
430
431If the fileglob does not match any files ...
432
433See L<File::GlobMapper|File::GlobMapper> for more details.
434
435
436=back
437
438If the C<$input> parameter is any other type, C<undef> will be returned.
439
440
441
442=head3 The C<$output> parameter
443
444The parameter C<$output> is used to control the destination of the
445uncompressed data. This parameter can take one of these forms.
446
447=over 5
448
449=item A filename
450
1a6a8453
PM
451If the C<$output> parameter is a simple scalar, it is assumed to be a
452filename. This file will be opened for writing and the uncompressed
453data will be written to it.
642e522c
RGS
454
455=item A filehandle
456
1a6a8453
PM
457If the C<$output> parameter is a filehandle, the uncompressed data
458will be written to it.
642e522c
RGS
459The string '-' can be used as an alias for standard output.
460
461
462=item A scalar reference
463
1a6a8453
PM
464If C<$output> is a scalar reference, the uncompressed data will be
465stored in C<$$output>.
642e522c
RGS
466
467
642e522c
RGS
468
469=item An Array Reference
470
1a6a8453
PM
471If C<$output> is an array reference, the uncompressed data will be
472pushed onto the array.
642e522c
RGS
473
474=item An Output FileGlob
475
476If C<$output> is a string that is delimited by the characters "<" and ">"
477C<gunzip> will assume that it is an I<output fileglob string>. The
478output is the list of files that match the fileglob.
479
480When C<$output> is an fileglob string, C<$input> must also be a fileglob
481string. Anything else is an error.
482
483=back
484
485If the C<$output> parameter is any other type, C<undef> will be returned.
486
642e522c 487
642e522c
RGS
488
489=head2 Notes
490
c70c1701
PM
491
492When C<$input> maps to multiple compressed files/buffers and C<$output> is
493a single file/buffer, after uncompression C<$output> will contain a
494concatenation of all the uncompressed data from each of the input
495files/buffers.
496
497
642e522c
RGS
498
499
500
501=head2 Optional Parameters
502
503Unless specified below, the optional parameters for C<gunzip>,
504C<OPTS>, are the same as those used with the OO interface defined in the
505L</"Constructor Options"> section below.
506
507=over 5
508
e7d45986 509=item C<< AutoClose => 0|1 >>
642e522c 510
1a6a8453
PM
511This option applies to any input or output data streams to
512C<gunzip> that are filehandles.
642e522c
RGS
513
514If C<AutoClose> is specified, and the value is true, it will result in all
515input and/or output filehandles being closed once C<gunzip> has
516completed.
517
518This parameter defaults to 0.
519
520
e7d45986 521=item C<< BinModeOut => 0|1 >>
1a6a8453
PM
522
523When writing to a file or filehandle, set C<binmode> before writing to the
524file.
525
526Defaults to 0.
527
528
529
530
531
e7d45986 532=item C<< Append => 0|1 >>
642e522c
RGS
533
534TODO
535
e7d45986 536=item C<< MultiStream => 0|1 >>
1a6a8453 537
e7d45986
PM
538If the input file/buffer contains multiple compressed data streams, this
539option will uncompress the whole lot as a single data stream.
1a6a8453 540
e7d45986 541Defaults to 0.
1a6a8453 542
642e522c
RGS
543
544
545=back
546
547
548
549
550=head2 Examples
551
552To read the contents of the file C<file1.txt.gz> and write the
553compressed data to the file C<file1.txt>.
554
555 use strict ;
556 use warnings ;
557 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
558
559 my $input = "file1.txt.gz";
560 my $output = "file1.txt";
561 gunzip $input => $output
562 or die "gunzip failed: $GunzipError\n";
563
564
565To read from an existing Perl filehandle, C<$input>, and write the
566uncompressed data to a buffer, C<$buffer>.
567
568 use strict ;
569 use warnings ;
570 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
571 use IO::File ;
572
573 my $input = new IO::File "<file1.txt.gz"
574 or die "Cannot open 'file1.txt.gz': $!\n" ;
575 my $buffer ;
576 gunzip $input => \$buffer
577 or die "gunzip failed: $GunzipError\n";
578
579To uncompress all files in the directory "/my/home" that match "*.txt.gz" and store the compressed data in the same directory
580
581 use strict ;
582 use warnings ;
583 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
584
585 gunzip '</my/home/*.txt.gz>' => '</my/home/#1.txt>'
586 or die "gunzip failed: $GunzipError\n";
587
588and if you want to compress each file one at a time, this will do the trick
589
590 use strict ;
591 use warnings ;
592 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
593
594 for my $input ( glob "/my/home/*.txt.gz" )
595 {
596 my $output = $input;
597 $output =~ s/.gz// ;
598 gunzip $input => $output
599 or die "Error compressing '$input': $GunzipError\n";
600 }
601
602=head1 OO Interface
603
604=head2 Constructor
605
606The format of the constructor for IO::Uncompress::Gunzip is shown below
607
608
609 my $z = new IO::Uncompress::Gunzip $input [OPTS]
610 or die "IO::Uncompress::Gunzip failed: $GunzipError\n";
611
612Returns an C<IO::Uncompress::Gunzip> object on success and undef on failure.
613The variable C<$GunzipError> will contain an error message on failure.
614
1a6a8453
PM
615If you are running Perl 5.005 or better the object, C<$z>, returned from
616IO::Uncompress::Gunzip can be used exactly like an L<IO::File|IO::File> filehandle.
617This means that all normal input file operations can be carried out with
618C<$z>. For example, to read a line from a compressed file/buffer you can
619use either of these forms
642e522c
RGS
620
621 $line = $z->getline();
622 $line = <$z>;
623
624The mandatory parameter C<$input> is used to determine the source of the
625compressed data. This parameter can take one of three forms.
626
627=over 5
628
629=item A filename
630
631If the C<$input> parameter is a scalar, it is assumed to be a filename. This
632file will be opened for reading and the compressed data will be read from it.
633
634=item A filehandle
635
636If the C<$input> parameter is a filehandle, the compressed data will be
637read from it.
638The string '-' can be used as an alias for standard input.
639
640
641=item A scalar reference
642
643If C<$input> is a scalar reference, the compressed data will be read from
644C<$$output>.
645
646=back
647
648=head2 Constructor Options
649
650
651The option names defined below are case insensitive and can be optionally
652prefixed by a '-'. So all of the following are valid
653
654 -AutoClose
655 -autoclose
656 AUTOCLOSE
657 autoclose
658
659OPTS is a combination of the following options:
660
661=over 5
662
e7d45986 663=item C<< AutoClose => 0|1 >>
642e522c
RGS
664
665This option is only valid when the C<$input> parameter is a filehandle. If
666specified, and the value is true, it will result in the file being closed once
667either the C<close> method is called or the IO::Uncompress::Gunzip object is
668destroyed.
669
670This parameter defaults to 0.
671
e7d45986 672=item C<< MultiStream => 0|1 >>
642e522c
RGS
673
674
675
676Allows multiple concatenated compressed streams to be treated as a single
677compressed stream. Decompression will stop once either the end of the
678file/buffer is reached, an error is encountered (premature eof, corrupt
679compressed data) or the end of a stream is not immediately followed by the
680start of another stream.
681
682This parameter defaults to 0.
683
684
e7d45986 685=item C<< Prime => $string >>
642e522c
RGS
686
687This option will uncompress the contents of C<$string> before processing the
688input file/buffer.
689
690This option can be useful when the compressed data is embedded in another
691file/data structure and it is not possible to work out where the compressed
1a6a8453
PM
692data begins without having to read the first few bytes. If this is the
693case, the uncompression can be I<primed> with these bytes using this
694option.
642e522c 695
e7d45986 696=item C<< Transparent => 0|1 >>
642e522c
RGS
697
698If this option is set and the input file or buffer is not compressed data,
699the module will allow reading of it anyway.
700
701This option defaults to 1.
702
e7d45986 703=item C<< BlockSize => $num >>
642e522c 704
1a6a8453
PM
705When reading the compressed input data, IO::Uncompress::Gunzip will read it in
706blocks of C<$num> bytes.
642e522c
RGS
707
708This option defaults to 4096.
709
e7d45986 710=item C<< InputLength => $size >>
642e522c 711
1a6a8453
PM
712When present this option will limit the number of compressed bytes read
713from the input file/buffer to C<$size>. This option can be used in the
714situation where there is useful data directly after the compressed data
715stream and you know beforehand the exact length of the compressed data
716stream.
642e522c 717
1a6a8453
PM
718This option is mostly used when reading from a filehandle, in which case
719the file pointer will be left pointing to the first byte directly after the
642e522c
RGS
720compressed data stream.
721
722
723
724This option defaults to off.
725
e7d45986 726=item C<< Append => 0|1 >>
642e522c
RGS
727
728This option controls what the C<read> method does with uncompressed data.
729
1a6a8453
PM
730If set to 1, all uncompressed data will be appended to the output parameter
731of the C<read> method.
642e522c 732
1a6a8453
PM
733If set to 0, the contents of the output parameter of the C<read> method
734will be overwritten by the uncompressed data.
642e522c
RGS
735
736Defaults to 0.
737
e7d45986 738=item C<< Strict => 0|1 >>
642e522c
RGS
739
740
741
742This option controls whether the extra checks defined below are used when
1a6a8453
PM
743carrying out the decompression. When Strict is on, the extra tests are
744carried out, when Strict is off they are not.
642e522c
RGS
745
746The default for this option is off.
747
748
749
750
751
752
753
754
755
756=over 5
757
758=item 1
759
760If the FHCRC bit is set in the gzip FLG header byte, the CRC16 bytes in the
761header must match the crc16 value of the gzip header actually read.
762
763=item 2
764
765If the gzip header contains a name field (FNAME) it consists solely of ISO
7668859-1 characters.
767
768=item 3
769
1a6a8453
PM
770If the gzip header contains a comment field (FCOMMENT) it consists solely
771of ISO 8859-1 characters plus line-feed.
642e522c
RGS
772
773=item 4
774
775If the gzip FEXTRA header field is present it must conform to the sub-field
a02d0f6f 776structure as defined in RFC 1952.
642e522c
RGS
777
778=item 5
779
780The CRC32 and ISIZE trailer fields must be present.
781
782=item 6
783
784The value of the CRC32 field read must match the crc32 value of the
785uncompressed data actually contained in the gzip file.
786
787=item 7
788
1a6a8453
PM
789The value of the ISIZE fields read must match the length of the
790uncompressed data actually read from the file.
642e522c
RGS
791
792=back
793
794
795
796
797
798
e7d45986 799=item C<< ParseExtra => 0|1 >>
642e522c
RGS
800
801If the gzip FEXTRA header field is present and this option is set, it will
802force the module to check that it conforms to the sub-field structure as
a02d0f6f 803defined in RFC 1952.
642e522c
RGS
804
805If the C<Strict> is on it will automatically enable this option.
806
807Defaults to 0.
808
809
810
a02d0f6f
RGS
811
812
642e522c
RGS
813=back
814
815=head2 Examples
816
817TODO
818
819=head1 Methods
820
821=head2 read
822
823Usage is
824
825 $status = $z->read($buffer)
826
827Reads a block of compressed data (the size the the compressed block is
828determined by the C<Buffer> option in the constructor), uncompresses it and
1a6a8453
PM
829writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
830set in the constructor, the uncompressed data will be appended to the
831C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
642e522c 832
1a6a8453
PM
833Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
834or a negative number on error.
642e522c
RGS
835
836=head2 read
837
838Usage is
839
840 $status = $z->read($buffer, $length)
841 $status = $z->read($buffer, $length, $offset)
842
843 $status = read($z, $buffer, $length)
844 $status = read($z, $buffer, $length, $offset)
845
846Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
847
1a6a8453
PM
848The main difference between this form of the C<read> method and the
849previous one, is that this one will attempt to return I<exactly> C<$length>
850bytes. The only circumstances that this function will not is if end-of-file
851or an IO error is encountered.
642e522c 852
1a6a8453
PM
853Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
854or a negative number on error.
642e522c
RGS
855
856
857=head2 getline
858
859Usage is
860
861 $line = $z->getline()
862 $line = <$z>
863
864Reads a single line.
865
866This method fully supports the use of of the variable C<$/>
867(or C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
868determine what constitutes an end of line. Both paragraph mode and file
869slurp mode are supported.
870
871
872=head2 getc
873
874Usage is
875
876 $char = $z->getc()
877
878Read a single character.
879
880=head2 ungetc
881
882Usage is
883
884 $char = $z->ungetc($string)
885
886
a02d0f6f 887
642e522c
RGS
888=head2 inflateSync
889
890Usage is
891
892 $status = $z->inflateSync()
893
894TODO
895
a02d0f6f 896
642e522c
RGS
897=head2 getHeaderInfo
898
899Usage is
900
1a6a8453
PM
901 $hdr = $z->getHeaderInfo();
902 @hdrs = $z->getHeaderInfo();
642e522c 903
1a6a8453
PM
904This method returns either a hash reference (in scalar context) or a list
905or hash references (in array context) that contains information about each
906of the header fields in the compressed data stream(s).
642e522c
RGS
907
908
909
1a6a8453 910=over 5
642e522c 911
1a6a8453 912=item Name
642e522c 913
1a6a8453
PM
914The contents of the Name header field, if present. If no name is
915present, the value will be undef. Note this is different from a zero length
916name, which will return an empty string.
642e522c
RGS
917
918=item Comment
919
1a6a8453
PM
920The contents of the Comment header field, if present. If no comment is
921present, the value will be undef. Note this is different from a zero length
922comment, which will return an empty string.
642e522c
RGS
923
924=back
925
926
927
928
929=head2 tell
930
931Usage is
932
933 $z->tell()
934 tell $z
935
936Returns the uncompressed file offset.
937
938=head2 eof
939
940Usage is
941
942 $z->eof();
943 eof($z);
944
945
946
947Returns true if the end of the compressed input stream has been reached.
948
949
950
951=head2 seek
952
953 $z->seek($position, $whence);
954 seek($z, $position, $whence);
955
956
957
958
959Provides a sub-set of the C<seek> functionality, with the restriction
960that it is only legal to seek forward in the input file/buffer.
961It is a fatal error to attempt to seek backward.
962
963
964
965The C<$whence> parameter takes one the usual values, namely SEEK_SET,
966SEEK_CUR or SEEK_END.
967
968Returns 1 on success, 0 on failure.
969
970=head2 binmode
971
972Usage is
973
974 $z->binmode
975 binmode $z ;
976
977This is a noop provided for completeness.
978
a02d0f6f
RGS
979=head2 opened
980
981 $z->opened()
982
983Returns true if the object currently refers to a opened file/buffer.
984
985=head2 autoflush
986
987 my $prev = $z->autoflush()
988 my $prev = $z->autoflush(EXPR)
989
990If the C<$z> object is associated with a file or a filehandle, this method
991returns the current autoflush setting for the underlying filehandle. If
992C<EXPR> is present, and is non-zero, it will enable flushing after every
993write/print operation.
994
995If C<$z> is associated with a buffer, this method has no effect and always
996returns C<undef>.
997
998B<Note> that the special variable C<$|> B<cannot> be used to set or
999retrieve the autoflush setting.
1000
1001=head2 input_line_number
1002
1003 $z->input_line_number()
1004 $z->input_line_number(EXPR)
1005
1006
1007
1008Returns the current uncompressed line number. If C<EXPR> is present it has
1009the effect of setting the line number. Note that setting the line number
1010does not change the current position within the file/buffer being read.
1011
1012The contents of C<$/> are used to to determine what constitutes a line
1013terminator.
1014
1015
1016
642e522c
RGS
1017=head2 fileno
1018
1019 $z->fileno()
1020 fileno($z)
1021
a02d0f6f
RGS
1022If the C<$z> object is associated with a file or a filehandle, this method
1023will return the underlying file descriptor.
642e522c
RGS
1024
1025If the C<$z> object is is associated with a buffer, this method will
1026return undef.
1027
1028=head2 close
1029
1030 $z->close() ;
1031 close $z ;
1032
1033
1034
1035Closes the output file/buffer.
1036
1037
1038
1039For most versions of Perl this method will be automatically invoked if
1040the IO::Uncompress::Gunzip object is destroyed (either explicitly or by the
1041variable with the reference to the object going out of scope). The
1042exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1043these cases, the C<close> method will be called automatically, but
1044not until global destruction of all live objects when the program is
1045terminating.
1046
1047Therefore, if you want your scripts to be able to run on all versions
1048of Perl, you should call C<close> explicitly and not rely on automatic
1049closing.
1050
1051Returns true on success, otherwise 0.
1052
1053If the C<AutoClose> option has been enabled when the IO::Uncompress::Gunzip
1054object was created, and the object is associated with a file, the
1055underlying file will also be closed.
1056
1057
1058
1059
e7d45986
PM
1060=head2 nextStream
1061
1062Usage is
1063
1064 my $status = $z->nextStream();
1065
1066Skips to the next compressed data stream in the input file/buffer. If a new
1067compressed data stream is found, the eof marker will be cleared, C<$.> will
1068be reset to 0.
1069
1070Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1071error was encountered.
1072
1073=head2 trailingData
1074
1075Usage is
1076
1077 my $data = $z->trailingData();
1078
1079Returns any data that
1080
642e522c
RGS
1081=head1 Importing
1082
1083No symbolic constants are required by this IO::Uncompress::Gunzip at present.
1084
1085=over 5
1086
1087=item :all
1088
1089Imports C<gunzip> and C<$GunzipError>.
1090Same as doing this
1091
1092 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;
1093
1094=back
1095
1096=head1 EXAMPLES
1097
1098
1099
1100
1101=head1 SEE ALSO
1102
a02d0f6f 1103L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
642e522c
RGS
1104
1105L<Compress::Zlib::FAQ|Compress::Zlib::FAQ>
1106
a02d0f6f
RGS
1107L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1108L<Archive::Tar|Archive::Tar>,
642e522c
RGS
1109L<IO::Zlib|IO::Zlib>
1110
a02d0f6f 1111
642e522c
RGS
1112For RFC 1950, 1951 and 1952 see
1113F<http://www.faqs.org/rfcs/rfc1950.html>,
1114F<http://www.faqs.org/rfcs/rfc1951.html> and
1115F<http://www.faqs.org/rfcs/rfc1952.html>
1116
a02d0f6f
RGS
1117The I<zlib> compression library was written by Jean-loup Gailly
1118F<gzip@prep.ai.mit.edu> and Mark Adler F<madler@alumni.caltech.edu>.
1119
1120The primary site for the I<zlib> compression library is
1121F<http://www.zlib.org>.
1122
1123The primary site for gzip is F<http://www.gzip.org>.
1124
1125
1126
1127
642e522c
RGS
1128=head1 AUTHOR
1129
cb7abd7f 1130This module was written by Paul Marquess, F<pmqs@cpan.org>.
642e522c 1131
642e522c 1132
642e522c
RGS
1133
1134=head1 MODIFICATION HISTORY
1135
1136See the Changes file.
1137
1138=head1 COPYRIGHT AND LICENSE
642e522c 1139
1a6a8453 1140Copyright (c) 2005-2006 Paul Marquess. All rights reserved.
a02d0f6f 1141
642e522c
RGS
1142This program is free software; you can redistribute it and/or
1143modify it under the same terms as Perl itself.
1144