Commit | Line | Data |
---|---|---|
8d0ec8cf RGS |
1 | # IO::Zlib.pm |
2 | # | |
3 | # Copyright (c) 1998-2004 Tom Hughes <tom@compton.nu>. | |
4 | # All rights reserved. This program is free software; you can redistribute | |
5 | # it and/or modify it under the same terms as Perl itself. | |
6 | ||
7 | package IO::Zlib; | |
8 | ||
ee60cd3f | 9 | $VERSION = "1.04_02"; |
8d0ec8cf RGS |
10 | |
11 | =head1 NAME | |
12 | ||
13 | IO::Zlib - IO:: style interface to L<Compress::Zlib> | |
14 | ||
15 | =head1 SYNOPSIS | |
16 | ||
17 | With any version of Perl 5 you can use the basic OO interface: | |
18 | ||
19 | use IO::Zlib; | |
20 | ||
21 | $fh = new IO::Zlib; | |
22 | if ($fh->open("file.gz", "rb")) { | |
23 | print <$fh>; | |
24 | $fh->close; | |
25 | } | |
26 | ||
27 | $fh = IO::Zlib->new("file.gz", "wb9"); | |
28 | if (defined $fh) { | |
29 | print $fh "bar\n"; | |
30 | $fh->close; | |
31 | } | |
32 | ||
33 | $fh = IO::Zlib->new("file.gz", "rb"); | |
34 | if (defined $fh) { | |
35 | print <$fh>; | |
36 | undef $fh; # automatically closes the file | |
37 | } | |
38 | ||
39 | With Perl 5.004 you can also use the TIEHANDLE interface to access | |
40 | compressed files just like ordinary files: | |
41 | ||
42 | use IO::Zlib; | |
43 | ||
44 | tie *FILE, 'IO::Zlib', "file.gz", "wb"; | |
45 | print FILE "line 1\nline2\n"; | |
46 | ||
47 | tie *FILE, 'IO::Zlib', "file.gz", "rb"; | |
48 | while (<FILE>) { print "LINE: ", $_ }; | |
49 | ||
50 | =head1 DESCRIPTION | |
51 | ||
52 | C<IO::Zlib> provides an IO:: style interface to L<Compress::Zlib> and | |
53 | hence to gzip/zlib compressed files. It provides many of the same methods | |
54 | as the L<IO::Handle> interface. | |
55 | ||
56 | Starting from IO::Zlib version 1.02, IO::Zlib can also use an | |
57 | external F<gzip> command. The default behaviour is to try to use | |
58 | an external F<gzip> if no C<Compress::Zlib> can be loaded, unless | |
59 | explicitly disabled by | |
60 | ||
61 | use IO::Zlib qw(:gzip_external 0); | |
62 | ||
63 | If explicitly enabled by | |
64 | ||
65 | use IO::Zlib qw(:gzip_external 1); | |
66 | ||
67 | then the external F<gzip> is used B<instead> of C<Compress::Zlib>. | |
68 | ||
69 | =head1 CONSTRUCTOR | |
70 | ||
71 | =over 4 | |
72 | ||
73 | =item new ( [ARGS] ) | |
74 | ||
75 | Creates an C<IO::Zlib> object. If it receives any parameters, they are | |
76 | passed to the method C<open>; if the open fails, the object is destroyed. | |
77 | Otherwise, it is returned to the caller. | |
78 | ||
79 | =back | |
80 | ||
81 | =head1 OBJECT METHODS | |
82 | ||
83 | =over 4 | |
84 | ||
85 | =item open ( FILENAME, MODE ) | |
86 | ||
87 | C<open> takes two arguments. The first is the name of the file to open | |
88 | and the second is the open mode. The mode can be anything acceptable to | |
89 | L<Compress::Zlib> and by extension anything acceptable to I<zlib> (that | |
90 | basically means POSIX fopen() style mode strings plus an optional number | |
91 | to indicate the compression level). | |
92 | ||
93 | =item opened | |
94 | ||
95 | Returns true if the object currently refers to a opened file. | |
96 | ||
97 | =item close | |
98 | ||
99 | Close the file associated with the object and disassociate | |
100 | the file from the handle. | |
101 | Done automatically on destroy. | |
102 | ||
103 | =item getc | |
104 | ||
105 | Return the next character from the file, or undef if none remain. | |
106 | ||
107 | =item getline | |
108 | ||
109 | Return the next line from the file, or undef on end of string. | |
110 | Can safely be called in an array context. | |
111 | Currently ignores $/ ($INPUT_RECORD_SEPARATOR or $RS when L<English> | |
112 | is in use) and treats lines as delimited by "\n". | |
113 | ||
114 | =item getlines | |
115 | ||
116 | Get all remaining lines from the file. | |
117 | It will croak() if accidentally called in a scalar context. | |
118 | ||
119 | =item print ( ARGS... ) | |
120 | ||
121 | Print ARGS to the file. | |
122 | ||
123 | =item read ( BUF, NBYTES, [OFFSET] ) | |
124 | ||
125 | Read some bytes from the file. | |
126 | Returns the number of bytes actually read, 0 on end-of-file, undef on error. | |
127 | ||
128 | =item eof | |
129 | ||
130 | Returns true if the handle is currently positioned at end of file? | |
131 | ||
132 | =item seek ( OFFSET, WHENCE ) | |
133 | ||
134 | Seek to a given position in the stream. | |
135 | Not yet supported. | |
136 | ||
137 | =item tell | |
138 | ||
139 | Return the current position in the stream, as a numeric offset. | |
140 | Not yet supported. | |
141 | ||
142 | =item setpos ( POS ) | |
143 | ||
144 | Set the current position, using the opaque value returned by C<getpos()>. | |
145 | Not yet supported. | |
146 | ||
147 | =item getpos ( POS ) | |
148 | ||
149 | Return the current position in the string, as an opaque object. | |
150 | Not yet supported. | |
151 | ||
152 | =back | |
153 | ||
154 | =head1 USING THE EXTERNAL GZIP | |
155 | ||
156 | If the external F<gzip> is used, the following C<open>s are used: | |
157 | ||
158 | open(FH, "gzip -dc $filename |") # for read opens | |
159 | open(FH, " | gzip > $filename") # for write opens | |
160 | ||
161 | You can modify the 'commands' for example to hardwire | |
162 | an absolute path by e.g. | |
163 | ||
164 | use IO::Zlib ':gzip_read_open' => '/some/where/gunzip -c %s |'; | |
165 | use IO::Zlib ':gzip_write_open' => '| /some/where/gzip.exe > %s'; | |
166 | ||
167 | The C<%s> is expanded to be the filename (C<sprintf> is used, so be | |
168 | careful to escape any other C<%> signs). The 'commands' are checked | |
169 | for sanity - they must contain the C<%s>, and the read open must end | |
170 | with the pipe sign, and the write open must begin with the pipe sign. | |
171 | ||
172 | =head1 CLASS METHODS | |
173 | ||
174 | =over 4 | |
175 | ||
176 | =item has_Compress_Zlib | |
177 | ||
178 | Returns true if C<Compress::Zlib> is available. Note that this does | |
179 | not mean that C<Compress::Zlib> is being used: see L</gzip_external> | |
180 | and L<gzip_used>. | |
181 | ||
182 | =item gzip_external | |
183 | ||
184 | Undef if an external F<gzip> B<can> be used if C<Compress::Zlib> is | |
185 | not available (see L</has_Compress_Zlib>), true if an external F<gzip> | |
186 | is explicitly used, false if an external F<gzip> must not be used. | |
187 | See L</gzip_used>. | |
188 | ||
189 | =item gzip_used | |
190 | ||
191 | True if an external F<gzip> is being used, false if not. | |
192 | ||
193 | =item gzip_read_open | |
194 | ||
195 | Return the 'command' being used for opening a file for reading using an | |
196 | external F<gzip>. | |
197 | ||
198 | =item gzip_write_open | |
199 | ||
200 | Return the 'command' being used for opening a file for writing using an | |
201 | external F<gzip>. | |
202 | ||
203 | =back | |
204 | ||
205 | =head1 DIAGNOSTICS | |
206 | ||
207 | =over 4 | |
208 | ||
209 | =item IO::Zlib::getlines: must be called in list context | |
210 | ||
211 | If you want read lines, you must read in list context. | |
212 | ||
213 | =item IO::Zlib::gzopen_external: mode '...' is illegal | |
214 | ||
215 | Use only modes 'rb' or 'wb' or /wb[1-9]/. | |
216 | ||
217 | =item IO::Zlib::import: '...' is illegal | |
218 | ||
219 | The known import symbols are the C<:gzip_external>, C<:gzip_read_open>, | |
220 | and C<:gzip_write_open>. Anything else is not recognized. | |
221 | ||
222 | =item IO::Zlib::import: ':gzip_external' requires an argument | |
223 | ||
224 | The C<:gzip_external> requires one boolean argument. | |
225 | ||
226 | =item IO::Zlib::import: 'gzip_read_open' requires an argument | |
227 | ||
228 | The C<:gzip_external> requires one string argument. | |
229 | ||
230 | =item IO::Zlib::import: 'gzip_read' '...' is illegal | |
231 | ||
232 | The C<:gzip_read_open> argument must end with the pipe sign (|) | |
233 | and have the C<%s> for the filename. See L</"USING THE EXTERNAL GZIP">. | |
234 | ||
235 | =item IO::Zlib::import: 'gzip_write_open' requires an argument | |
236 | ||
237 | The C<:gzip_external> requires one string argument. | |
238 | ||
239 | =item IO::Zlib::import: 'gzip_write_open' '...' is illegal | |
240 | ||
241 | The C<:gzip_write_open> argument must begin with the pipe sign (|) | |
242 | and have the C<%s> for the filename. An output redirect (>) is also | |
243 | often a good idea, depending on your operating system shell syntax. | |
244 | See L</"USING THE EXTERNAL GZIP">. | |
245 | ||
246 | =item IO::Zlib::import: no Compress::Zlib and no external gzip | |
247 | ||
248 | Given that we failed to load C<Compress::Zlib> and that the use of | |
249 | an external F<gzip> was disabled, IO::Zlib has not much chance of working. | |
250 | ||
251 | =item IO::Zlib::open: needs a filename | |
252 | ||
253 | No filename, no open. | |
254 | ||
255 | =item IO::Zlib::READ: NBYTES must be specified | |
256 | ||
257 | We must know how much to read. | |
258 | ||
259 | =item IO::Zlib::READ: OFFSET is not supported | |
260 | ||
261 | Offsets of gzipped streams are not supported. | |
262 | ||
263 | =item IO::Zlib::WRITE: too long LENGTH | |
264 | ||
265 | The LENGTH must be less than or equal to the buffer size. | |
266 | ||
267 | =item IO::Zlib::WRITE: OFFSET is not supported | |
268 | ||
269 | Offsets of gzipped streams are not supported. | |
270 | ||
271 | =back | |
272 | ||
273 | =head1 SEE ALSO | |
274 | ||
275 | L<perlfunc>, | |
276 | L<perlop/"I/O Operators">, | |
277 | L<IO::Handle>, | |
278 | L<Compress::Zlib> | |
279 | ||
280 | =head1 HISTORY | |
281 | ||
282 | Created by Tom Hughes E<lt>F<tom@compton.nu>E<gt>. | |
283 | ||
284 | Support for external gzip added by Jarkko Hietaniemi E<lt>F<jhi@iki.fi>E<gt>. | |
285 | ||
286 | =head1 COPYRIGHT | |
287 | ||
288 | Copyright (c) 1998-2004 Tom Hughes E<lt>F<tom@compton.nu>E<gt>. | |
289 | All rights reserved. This program is free software; you can redistribute | |
290 | it and/or modify it under the same terms as Perl itself. | |
291 | ||
292 | =cut | |
293 | ||
294 | require 5.004; | |
295 | ||
296 | use strict; | |
297 | use vars qw($VERSION $AUTOLOAD @ISA); | |
298 | ||
299 | use Carp; | |
300 | use Fcntl qw(SEEK_SET); | |
301 | ||
302 | my $has_Compress_Zlib; | |
303 | my $aliased; | |
304 | ||
305 | sub has_Compress_Zlib { | |
306 | $has_Compress_Zlib; | |
307 | } | |
308 | ||
309 | BEGIN { | |
310 | eval { require Compress::Zlib }; | |
311 | $has_Compress_Zlib = $@ ? 0 : 1; | |
312 | } | |
313 | ||
314 | use Symbol; | |
315 | use Tie::Handle; | |
316 | ||
317 | # These might use some $^O logic. | |
318 | my $gzip_read_open = "gzip -dc %s |"; | |
319 | my $gzip_write_open = "| gzip > %s"; | |
320 | ||
321 | my $gzip_external; | |
322 | my $gzip_used; | |
323 | ||
324 | sub gzip_read_open { | |
325 | $gzip_read_open; | |
326 | } | |
327 | ||
328 | sub gzip_write_open { | |
329 | $gzip_write_open; | |
330 | } | |
331 | ||
332 | sub gzip_external { | |
333 | $gzip_external; | |
334 | } | |
335 | ||
336 | sub gzip_used { | |
337 | $gzip_used; | |
338 | } | |
339 | ||
340 | sub can_gunzip { | |
341 | $has_Compress_Zlib || $gzip_external; | |
342 | } | |
343 | ||
344 | sub _import { | |
345 | my $import = shift; | |
346 | while (@_) { | |
347 | if ($_[0] eq ':gzip_external') { | |
348 | shift; | |
349 | if (@_) { | |
350 | $gzip_external = shift; | |
351 | } else { | |
352 | croak "$import: ':gzip_external' requires an argument"; | |
353 | } | |
354 | } | |
355 | elsif ($_[0] eq ':gzip_read_open') { | |
356 | shift; | |
357 | if (@_) { | |
358 | $gzip_read_open = shift; | |
359 | croak "$import: ':gzip_read_open' '$gzip_read_open' is illegal" | |
360 | unless $gzip_read_open =~ /^.+%s.+\|\s*$/; | |
361 | } else { | |
362 | croak "$import: ':gzip_read_open' requires an argument"; | |
363 | } | |
364 | } | |
365 | elsif ($_[0] eq ':gzip_write_open') { | |
366 | shift; | |
367 | if (@_) { | |
368 | $gzip_write_open = shift; | |
369 | croak "$import: ':gzip_write_open' '$gzip_read_open' is illegal" | |
370 | unless $gzip_write_open =~ /^\s*\|.+%s.*$/; | |
371 | } else { | |
372 | croak "$import: ':gzip_write_open' requires an argument"; | |
373 | } | |
374 | } | |
375 | else { | |
376 | last; | |
377 | } | |
378 | } | |
379 | return @_; | |
380 | } | |
381 | ||
382 | sub _alias { | |
383 | my $import = shift; | |
384 | if ((!$has_Compress_Zlib && !defined $gzip_external) || $gzip_external) { | |
385 | # The undef *gzopen is really needed only during | |
386 | # testing where we eval several 'use IO::Zlib's. | |
387 | undef *gzopen; | |
388 | *gzopen = \&gzopen_external; | |
389 | *IO::Handle::gzread = \&gzread_external; | |
390 | *IO::Handle::gzwrite = \&gzwrite_external; | |
391 | *IO::Handle::gzreadline = \&gzreadline_external; | |
392 | *IO::Handle::gzclose = \&gzclose_external; | |
393 | $gzip_used = 1; | |
394 | } else { | |
395 | croak "$import: no Compress::Zlib and no external gzip" | |
396 | unless $has_Compress_Zlib; | |
397 | *gzopen = \&Compress::Zlib::gzopen; | |
398 | *gzread = \&Compress::Zlib::gzread; | |
399 | *gzwrite = \&Compress::Zlib::gzwrite; | |
400 | *gzreadline = \&Compress::Zlib::gzreadline; | |
401 | } | |
402 | $aliased = 1; | |
403 | } | |
404 | ||
405 | sub import { | |
406 | shift; | |
407 | my $import = "IO::Zlib::import"; | |
408 | if (@_) { | |
409 | if (_import($import, @_)) { | |
410 | croak "$import: '@_' is illegal"; | |
411 | } | |
412 | } | |
413 | _alias($import); | |
414 | } | |
415 | ||
416 | @ISA = qw(Tie::Handle); | |
417 | ||
418 | sub TIEHANDLE | |
419 | { | |
420 | my $class = shift; | |
421 | my @args = @_; | |
422 | ||
423 | my $self = bless {}, $class; | |
424 | ||
425 | return @args ? $self->OPEN(@args) : $self; | |
426 | } | |
427 | ||
428 | sub DESTROY | |
429 | { | |
430 | } | |
431 | ||
432 | sub OPEN | |
433 | { | |
434 | my $self = shift; | |
435 | my $filename = shift; | |
436 | my $mode = shift; | |
437 | ||
438 | croak "IO::Zlib::open: needs a filename" unless defined($filename); | |
439 | ||
440 | $self->{'file'} = gzopen($filename,$mode); | |
441 | $self->{'eof'} = 0; | |
442 | ||
443 | return defined($self->{'file'}) ? $self : undef; | |
444 | } | |
445 | ||
446 | sub CLOSE | |
447 | { | |
448 | my $self = shift; | |
449 | ||
450 | return undef unless defined($self->{'file'}); | |
451 | ||
452 | my $status = $self->{'file'}->gzclose(); | |
453 | ||
454 | delete $self->{'file'}; | |
455 | delete $self->{'eof'}; | |
456 | ||
457 | return ($status == 0) ? 1 : undef; | |
458 | } | |
459 | ||
460 | sub READ | |
461 | { | |
462 | my $self = shift; | |
463 | my $bufref = \$_[0]; | |
464 | my $nbytes = $_[1]; | |
465 | my $offset = $_[2]; | |
466 | ||
467 | croak "IO::Zlib::READ: NBYTES must be specified" unless defined($nbytes); | |
468 | croak "IO::Zlib::READ: OFFSET is not supported" if defined($offset) && $offset != 0; | |
469 | ||
470 | return 0 if $self->{'eof'}; | |
471 | ||
472 | my $bytesread = $self->{'file'}->gzread($$bufref,$nbytes); | |
473 | ||
474 | return undef if $bytesread < 0; | |
475 | ||
476 | $self->{'eof'} = 1 if $bytesread < $nbytes; | |
477 | ||
478 | return $bytesread; | |
479 | } | |
480 | ||
481 | sub READLINE | |
482 | { | |
483 | my $self = shift; | |
484 | ||
485 | my $line; | |
486 | ||
487 | return () if $self->{'file'}->gzreadline($line) <= 0; | |
488 | ||
489 | return $line unless wantarray; | |
490 | ||
491 | my @lines = $line; | |
492 | ||
493 | while ($self->{'file'}->gzreadline($line) > 0) | |
494 | { | |
495 | push @lines, $line; | |
496 | } | |
497 | ||
498 | return @lines; | |
499 | } | |
500 | ||
501 | sub WRITE | |
502 | { | |
503 | my $self = shift; | |
504 | my $buf = shift; | |
505 | my $length = shift; | |
506 | my $offset = shift; | |
507 | ||
508 | croak "IO::Zlib::WRITE: too long LENGTH" unless $length <= length($buf); | |
509 | croak "IO::Zlib::WRITE: OFFSET not supported" if defined($offset) && $offset != 0; | |
510 | ||
511 | return $self->{'file'}->gzwrite(substr($buf,0,$length)); | |
512 | } | |
513 | ||
514 | sub EOF | |
515 | { | |
516 | my $self = shift; | |
517 | ||
518 | return $self->{'eof'}; | |
519 | } | |
520 | ||
521 | sub new | |
522 | { | |
523 | my $class = shift; | |
524 | my @args = @_; | |
525 | ||
526 | _alias("new", @_) unless $aliased; # Some call new IO::Zlib directly... | |
527 | ||
528 | my $self = gensym(); | |
529 | ||
530 | tie *{$self}, $class, @args; | |
531 | ||
532 | return tied(${$self}) ? bless $self, $class : undef; | |
533 | } | |
534 | ||
535 | sub getline | |
536 | { | |
537 | my $self = shift; | |
538 | ||
539 | return scalar tied(*{$self})->READLINE(); | |
540 | } | |
541 | ||
542 | sub getlines | |
543 | { | |
544 | my $self = shift; | |
545 | ||
546 | croak "IO::Zlib::getlines: must be called in list context" | |
547 | unless wantarray; | |
548 | ||
549 | return tied(*{$self})->READLINE(); | |
550 | } | |
551 | ||
552 | sub opened | |
553 | { | |
554 | my $self = shift; | |
555 | ||
556 | return defined tied(*{$self})->{'file'}; | |
557 | } | |
558 | ||
559 | sub AUTOLOAD | |
560 | { | |
561 | my $self = shift; | |
562 | ||
563 | $AUTOLOAD =~ s/.*:://; | |
564 | $AUTOLOAD =~ tr/a-z/A-Z/; | |
565 | ||
566 | return tied(*{$self})->$AUTOLOAD(@_); | |
567 | } | |
568 | ||
569 | sub gzopen_external { | |
570 | my ($filename, $mode) = @_; | |
571 | require IO::Handle; | |
572 | my $fh = IO::Handle->new(); | |
573 | if ($mode =~ /r/) { | |
574 | # Because someone will try to read ungzipped files | |
575 | # with this we peek and verify the signature. Yes, | |
576 | # this means that we open the file twice (if it is | |
577 | # gzipped). | |
578 | # Plenty of race conditions exist in this code, but | |
579 | # the alternative would be to capture the stderr of | |
580 | # gzip and parse it, which would be a portability nightmare. | |
581 | if (-e $filename && open($fh, $filename)) { | |
582 | binmode $fh; | |
583 | my $sig; | |
584 | my $rdb = read($fh, $sig, 2); | |
585 | if ($rdb == 2 && $sig eq "\x1F\x8B") { | |
586 | my $ropen = sprintf $gzip_read_open, $filename; | |
587 | if (open($fh, $ropen)) { | |
588 | binmode $fh; | |
589 | return $fh; | |
590 | } else { | |
591 | return undef; | |
592 | } | |
593 | } | |
594 | seek($fh, 0, SEEK_SET) or | |
595 | die "IO::Zlib: open('$filename', 'r'): seek: $!"; | |
596 | return $fh; | |
597 | } else { | |
598 | return undef; | |
599 | } | |
600 | } elsif ($mode =~ /w/) { | |
601 | my $level = ''; | |
602 | $level = "-$1" if $mode =~ /([1-9])/; | |
603 | # To maximize portability we would need to open | |
604 | # two filehandles here, one for "| gzip $level" | |
605 | # and another for "> $filename", and then when | |
606 | # writing copy bytes from the first to the second. | |
607 | # We are using IO::Handle objects for now, however, | |
608 | # and they can only contain one stream at a time. | |
609 | my $wopen = sprintf $gzip_write_open, $filename; | |
610 | if (open($fh, $wopen)) { | |
611 | $fh->autoflush(1); | |
612 | binmode $fh; | |
613 | return $fh; | |
614 | } else { | |
615 | return undef; | |
616 | } | |
617 | } else { | |
618 | croak "IO::Zlib::gzopen_external: mode '$mode' is illegal"; | |
619 | } | |
620 | return undef; | |
621 | } | |
622 | ||
623 | sub gzread_external { | |
624 | # Use read() instead of syswrite() because people may | |
625 | # mix reads and readlines, and we don't want to mess | |
626 | # the stdio buffering. See also gzreadline_external() | |
627 | # and gzwrite_external(). | |
628 | my $nread = read($_[0], $_[1], @_ == 3 ? $_[2] : 4096); | |
629 | defined $nread ? $nread : -1; | |
630 | } | |
631 | ||
632 | sub gzwrite_external { | |
633 | # Using syswrite() is okay (cf. gzread_external()) | |
634 | # since the bytes leave this process and buffering | |
635 | # is therefore not an issue. | |
636 | my $nwrote = syswrite($_[0], $_[1]); | |
637 | defined $nwrote ? $nwrote : -1; | |
638 | } | |
639 | ||
640 | sub gzreadline_external { | |
641 | # See the comment in gzread_external(). | |
642 | $_[1] = readline($_[0]); | |
643 | return defined $_[1] ? length($_[1]) : -1; | |
644 | } | |
645 | ||
646 | sub gzclose_external { | |
647 | close($_[0]); | |
648 | # I am not entirely certain why this is needed but it seems | |
649 | # the above close() always fails (as if the stream would have | |
650 | # been already closed - something to do with using external | |
651 | # processes via pipes?) | |
652 | return 0; | |
653 | } | |
654 | ||
655 | 1; |