This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update Changes.
[perl5.git] / lib / Tie / File.pm
CommitLineData
b5aed31e
AMS
1
2package Tie::File;
3use Carp;
4use POSIX 'SEEK_SET';
51efdd02 5use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
b5aed31e
AMS
6require 5.005;
7
836d9961 8$VERSION = "0.17";
b5aed31e
AMS
9
10# Idea: The object will always contain an array of byte offsets
11# this will be filled in as is necessary and convenient.
12# fetch will do seek-read.
13# There will be a cache parameter that controls the amount of cached *data*
14# Also an LRU queue of cached records
15# store will read the relevant record into the cache
16# If it's the same length as what is being written, it will overwrite it in
17# place; if not, it will do a from-to copying write.
18# The record separator string is also a parameter
19
20# Record numbers start at ZERO.
21
22my $DEFAULT_CACHE_SIZE = 1<<21; # 2 megabytes
23
24sub TIEARRAY {
25 if (@_ % 2 != 0) {
26 croak "usage: tie \@array, $_[0], filename, [option => value]...";
27 }
28 my ($pack, $file, %opts) = @_;
29
30 # transform '-foo' keys into 'foo' keys
31 for my $key (keys %opts) {
32 my $okey = $key;
33 if ($key =~ s/^-+//) {
34 $opts{$key} = delete $opts{$okey};
35 }
36 }
37
38 $opts{cachesize} ||= $DEFAULT_CACHE_SIZE;
39
40 # the cache is a hash instead of an array because it is likely to be
41 # sparsely populated
42 $opts{cache} = {};
43 $opts{cached} = 0; # total size of cached data
44 $opts{lru} = []; # replace with heap in later version
45
46 $opts{offsets} = [0];
47 $opts{filename} = $file;
48 $opts{recsep} = $/ unless defined $opts{recsep};
49 $opts{recseplen} = length($opts{recsep});
50 if ($opts{recseplen} == 0) {
51 croak "Empty record separator not supported by $pack";
52 }
53
54 my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
fa408a35 55 my $fh;
b5aed31e 56
fa408a35
AMS
57 if (UNIVERSAL::isa($file, 'GLOB')) {
58 unless (seek $file, 0, SEEK_SET) {
59 croak "$pack: your filehandle does not appear to be seekable";
60 }
61 $fh = $file;
62 } elsif (ref $file) {
63 croak "usage: tie \@array, $pack, filename, [option => value]...";
64 } else {
65 $fh = \do { local *FH }; # only works in 5.005 and later
66 sysopen $fh, $file, $mode, 0666 or return;
67 binmode $fh;
68 }
b5aed31e
AMS
69 { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
70 $opts{fh} = $fh;
71
72 bless \%opts => $pack;
73}
74
75sub FETCH {
76 my ($self, $n) = @_;
77
78 # check the record cache
79 { my $cached = $self->_check_cache($n);
80 return $cached if defined $cached;
81 }
82
83 unless ($#{$self->{offsets}} >= $n) {
84 my $o = $self->_fill_offsets_to($n);
85 # If it's still undefined, there is no such record, so return 'undef'
86 return unless defined $o;
87 }
88
89 my $fh = $self->{FH};
90 $self->_seek($n); # we can do this now that offsets is populated
91 my $rec = $self->_read_record;
92 $self->_cache_insert($n, $rec) if defined $rec;
93 $rec;
94}
95
96sub STORE {
97 my ($self, $n, $rec) = @_;
98
99 $self->_fixrecs($rec);
100
101 # TODO: what should we do about the cache? Install the new record
102 # in the cache only if the old version of the same record was
103 # already there?
104
105 # We need this to decide whether the new record will fit
106 # It incidentally populates the offsets table
107 # Note we have to do this before we alter the cache
108 my $oldrec = $self->FETCH($n);
109
110 # _check_cache promotes record $n to MRU. Is this correct behavior?
fa408a35
AMS
111 if (my $cached = $self->_check_cache($n)) {
112 $self->{cache}{$n} = $rec;
113 $self->{cached} += length($rec) - length($cached);
114 }
b5aed31e
AMS
115
116 if (not defined $oldrec) {
117 # We're storing a record beyond the end of the file
51efdd02 118 $self->_extend_file_to($n+1);
b5aed31e
AMS
119 $oldrec = $self->{recsep};
120 }
121 my $len_diff = length($rec) - length($oldrec);
122
123 $self->_twrite($rec, $self->{offsets}[$n], length($oldrec));
124
125 # now update the offsets
126 # array slice goes from element $n+1 (the first one to move)
127 # to the end
128 for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) {
129 $_ += $len_diff;
130 }
131}
132
133sub FETCHSIZE {
134 my $self = shift;
135 my $n = $#{$self->{offsets}};
136 while (defined ($self->_fill_offsets_to($n+1))) {
137 ++$n;
138 }
139 $n;
140}
141
142sub STORESIZE {
143 my ($self, $len) = @_;
144 my $olen = $self->FETCHSIZE;
145 return if $len == $olen; # Woo-hoo!
146
147 # file gets longer
148 if ($len > $olen) {
51efdd02 149 $self->_extend_file_to($len);
b5aed31e
AMS
150 return;
151 }
152
153 # file gets shorter
154 $self->_seek($len);
155 $self->_chop_file;
836d9961
JH
156 $#{$self->{offsets}} = $len;
157 my @cached = grep $_ >= $len, keys %{$self->{cache}};
158 $self->_uncache(@cached);
b5aed31e
AMS
159}
160
51efdd02
AMS
161sub PUSH {
162 my $self = shift;
163 $self->SPLICE($self->FETCHSIZE, scalar(@_), @_);
164 $self->FETCHSIZE;
165}
166
167sub POP {
168 my $self = shift;
7b6b3db1
JH
169 my $size = $self->FETCHSIZE;
170 return if $size == 0;
171# print STDERR "# POPPITY POP POP POP\n";
172 scalar $self->SPLICE($size-1, 1);
51efdd02
AMS
173}
174
175sub SHIFT {
176 my $self = shift;
177 scalar $self->SPLICE(0, 1);
178}
179
180sub UNSHIFT {
181 my $self = shift;
182 $self->SPLICE(0, 0, @_);
183 $self->FETCHSIZE;
184}
185
186sub CLEAR {
187 # And enable auto-defer mode, since it's likely that they just
188 # did @a = (...);
189 my $self = shift;
190 $self->_seekb(0);
191 $self->_chop_file;
192 %{$self->{cache}} = ();
193 $self->{cached} = 0;
194 @{$self->{lru}} = ();
195 @{$self->{offsets}} = (0);
196}
197
198sub EXTEND {
199 my ($self, $n) = @_;
200 $self->_fill_offsets_to($n);
201 $self->_extend_file_to($n);
202}
203
204sub DELETE {
205 my ($self, $n) = @_;
206 my $lastrec = $self->FETCHSIZE-1;
207 if ($n == $lastrec) {
208 $self->_seek($n);
209 $self->_chop_file;
fa408a35 210 $#{$self->{offsets}}--;
836d9961 211 $self->_uncache($n);
51efdd02
AMS
212 # perhaps in this case I should also remove trailing null records?
213 } else {
214 $self->STORE($n, "");
215 }
216}
217
218sub EXISTS {
219 my ($self, $n) = @_;
220 $self->_fill_offsets_to($n);
221 0 <= $n && $n < $self->FETCHSIZE;
222}
223
b5aed31e
AMS
224sub SPLICE {
225 my ($self, $pos, $nrecs, @data) = @_;
226 my @result;
227
7b6b3db1
JH
228 $pos = 0 unless defined $pos;
229
230 # Deal with negative and other out-of-range positions
231 # Also set default for $nrecs
51efdd02
AMS
232 {
233 my $oldsize = $self->FETCHSIZE;
7b6b3db1 234 $nrecs = $oldsize unless defined $nrecs;
51efdd02
AMS
235 my $oldpos = $pos;
236
237 if ($pos < 0) {
238 $pos += $oldsize;
239 if ($pos < 0) {
240 croak "Modification of non-creatable array value attempted, subscript $oldpos";
241 }
242 }
243
244 if ($pos > $oldsize) {
245 return unless @data;
246 $pos = $oldsize; # This is what perl does for normal arrays
247 }
248 }
b5aed31e
AMS
249
250 $self->_fixrecs(@data);
251 my $data = join '', @data;
252 my $datalen = length $data;
253 my $oldlen = 0;
254
255 # compute length of data being removed
51efdd02 256 # Incidentally fills offsets table
b5aed31e
AMS
257 for ($pos .. $pos+$nrecs-1) {
258 my $rec = $self->FETCH($_);
259 last unless defined $rec;
260 push @result, $rec;
261 $oldlen += length($rec);
262 }
263
51efdd02 264 # Modify the file
b5aed31e
AMS
265 $self->_twrite($data, $self->{offsets}[$pos], $oldlen);
266
267 # update the offsets table part 1
268 # compute the offsets of the new records:
269 my @new_offsets;
270 if (@data) {
271 push @new_offsets, $self->{offsets}[$pos];
272 for (0 .. $#data-1) {
273 push @new_offsets, $new_offsets[-1] + length($data[$_]);
274 }
275 }
276 splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets);
277
278 # update the offsets table part 2
279 # adjust the offsets of the following old records
280 for ($pos+@data .. $#{$self->{offsets}}) {
281 $self->{offsets}[$_] += $datalen - $oldlen;
282 }
283 # If we scrubbed out all known offsets, regenerate the trivial table
284 # that knows that the file does indeed start at 0.
285 $self->{offsets}[0] = 0 unless @{$self->{offsets}};
286
51efdd02
AMS
287 # Perhaps the following cache foolery could be factored out
288 # into a bunch of mor opaque cache functions. For example,
289 # it's odd to delete a record from the cache and then remove
290 # it from the LRU queue later on; there should be a function to
291 # do both at once.
292
b5aed31e
AMS
293 # update the read cache, part 1
294 # modified records
295 # Consider this carefully for correctness
296 for ($pos .. $pos+$nrecs-1) {
297 my $cached = $self->{cache}{$_};
298 next unless defined $cached;
299 my $new = $data[$_-$pos];
300 if (defined $new) {
301 $self->{cached} += length($new) - length($cached);
302 $self->{cache}{$_} = $new;
303 } else {
836d9961 304 $self->_uncache($_);
b5aed31e
AMS
305 }
306 }
307 # update the read cache, part 2
308 # moved records - records past the site of the change
309 # need to be renumbered
310 # Maybe merge this with the previous block?
311 for (keys %{$self->{cache}}) {
312 next unless $_ >= $pos + $nrecs;
313 $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_};
314 }
315
316 # fix the LRU queue
317 my(@new, @changed);
318 for (@{$self->{lru}}) {
319 if ($_ >= $pos + $nrecs) {
320 push @new, $_ + @data - $nrecs;
321 } elsif ($_ >= $pos) {
322 push @changed, $_ if $_ < $pos + @data;
323 } else {
324 push @new, $_;
325 }
326 }
327 @{$self->{lru}} = (@new, @changed);
328
51efdd02
AMS
329 # Yes, the return value of 'splice' *is* actually this complicated
330 wantarray ? @result : @result ? $result[-1] : undef;
b5aed31e
AMS
331}
332
333# write data into the file
334# $data is the data to be written.
335# it should be written at position $pos, and should overwrite
336# exactly $len of the following bytes.
337# Note that if length($data) > $len, the subsequent bytes will have to
338# be moved up, and if length($data) < $len, they will have to
339# be moved down
340sub _twrite {
341 my ($self, $data, $pos, $len) = @_;
342
343 unless (defined $pos) {
344 die "\$pos was undefined in _twrite";
345 }
346
347 my $len_diff = length($data) - $len;
348
349 if ($len_diff == 0) { # Woo-hoo!
350 my $fh = $self->{fh};
351 $self->_seekb($pos);
352 $self->_write_record($data);
353 return; # well, that was easy.
354 }
355
356 # the two records are of different lengths
357 # our strategy here: rewrite the tail of the file,
358 # reading ahead one buffer at a time
359 # $bufsize is required to be at least as large as the data we're overwriting
360 my $bufsize = _bufsize($len_diff);
361 my ($writepos, $readpos) = ($pos, $pos+$len);
51efdd02 362 my $next_block;
b5aed31e
AMS
363
364 # Seems like there ought to be a way to avoid the repeated code
365 # and the special case here. The read(1) is also a little weird.
366 # Think about this.
367 do {
368 $self->_seekb($readpos);
51efdd02 369 my $br = read $self->{fh}, $next_block, $bufsize;
b5aed31e
AMS
370 my $more_data = read $self->{fh}, my($dummy), 1;
371 $self->_seekb($writepos);
372 $self->_write_record($data);
373 $readpos += $br;
374 $writepos += length $data;
375 $data = $next_block;
b5aed31e 376 } while $more_data;
51efdd02
AMS
377 $self->_seekb($writepos);
378 $self->_write_record($next_block);
b5aed31e
AMS
379
380 # There might be leftover data at the end of the file
381 $self->_chop_file if $len_diff < 0;
382}
383
384# If a record does not already end with the appropriate terminator
385# string, append one.
386sub _fixrecs {
387 my $self = shift;
388 for (@_) {
389 $_ .= $self->{recsep}
390 unless substr($_, - $self->{recseplen}) eq $self->{recsep};
391 }
392}
393
394# seek to the beginning of record #$n
395# Assumes that the offsets table is already correctly populated
396#
397# Note that $n=-1 has a special meaning here: It means the start of
398# the last known record; this may or may not be the very last record
399# in the file, depending on whether the offsets table is fully populated.
400#
401sub _seek {
402 my ($self, $n) = @_;
403 my $o = $self->{offsets}[$n];
404 defined($o)
405 or confess("logic error: undefined offset for record $n");
406 seek $self->{fh}, $o, SEEK_SET
407 or die "Couldn't seek filehandle: $!"; # "Should never happen."
408}
409
410sub _seekb {
411 my ($self, $b) = @_;
412 seek $self->{fh}, $b, SEEK_SET
413 or die "Couldn't seek filehandle: $!"; # "Should never happen."
414}
415
416# populate the offsets table up to the beginning of record $n
417# return the offset of record $n
418sub _fill_offsets_to {
419 my ($self, $n) = @_;
420 my $fh = $self->{fh};
421 local *OFF = $self->{offsets};
422 my $rec;
423
424 until ($#OFF >= $n) {
425 my $o = $OFF[-1];
426 $self->_seek(-1); # tricky -- see comment at _seek
427 $rec = $self->_read_record;
428 if (defined $rec) {
51efdd02 429 push @OFF, tell $fh;
b5aed31e
AMS
430 } else {
431 return; # It turns out there is no such record
432 }
433 }
434
435 # we have now read all the records up to record n-1,
436 # so we can return the offset of record n
437 return $OFF[$n];
438}
439
440# assumes that $rec is already suitably terminated
441sub _write_record {
442 my ($self, $rec) = @_;
443 my $fh = $self->{fh};
444 print $fh $rec
445 or die "Couldn't write record: $!"; # "Should never happen."
446
447}
448
449sub _read_record {
450 my $self = shift;
451 my $rec;
452 { local $/ = $self->{recsep};
453 my $fh = $self->{fh};
454 $rec = <$fh>;
455 }
456 $rec;
457}
458
459sub _cache_insert {
460 my ($self, $n, $rec) = @_;
461
462 # Do not cache records that are too big to fit in the cache.
463 return unless length $rec <= $self->{cachesize};
464
465 $self->{cache}{$n} = $rec;
466 $self->{cached} += length $rec;
467 push @{$self->{lru}}, $n; # most-recently-used is at the END
468
469 $self->_cache_flush if $self->{cached} > $self->{cachesize};
470}
471
836d9961
JH
472sub _uncache {
473 my $self = shift;
474 for my $n (@_) {
475 my $cached = delete $self->{cache}{$n};
476 next unless defined $cached;
477 @{$self->{lru}} = grep $_ != $n, @{$self->{lru}};
478 $self->{cached} -= length($cached);
479 }
480}
481
b5aed31e
AMS
482sub _check_cache {
483 my ($self, $n) = @_;
484 my $rec;
485 return unless defined($rec = $self->{cache}{$n});
486
487 # cache hit; update LRU queue and return $rec
488 # replace this with a heap in a later version
489 @{$self->{lru}} = ((grep $_ ne $n, @{$self->{lru}}), $n);
490 $rec;
491}
492
493sub _cache_flush {
494 my ($self) = @_;
495 while ($self->{cached} > $self->{cachesize}) {
496 my $lru = shift @{$self->{lru}};
497 $self->{cached} -= length $lru;
498 delete $self->{cache}{$lru};
499 }
500}
501
502# We have read to the end of the file and have the offsets table
503# entirely populated. Now we need to write a new record beyond
504# the end of the file. We prepare for this by writing
505# empty records into the file up to the position we want
51efdd02
AMS
506#
507# assumes that the offsets table already contains the offset of record $n,
508# if it exists, and extends to the end of the file if not.
b5aed31e
AMS
509sub _extend_file_to {
510 my ($self, $n) = @_;
511 $self->_seek(-1); # position after the end of the last record
512 my $pos = $self->{offsets}[-1];
513
514 # the offsets table has one entry more than the total number of records
51efdd02 515 $extras = $n - $#{$self->{offsets}};
b5aed31e
AMS
516
517 # Todo : just use $self->{recsep} x $extras here?
518 while ($extras-- > 0) {
519 $self->_write_record($self->{recsep});
fa408a35 520 push @{$self->{offsets}}, tell $self->{fh};
b5aed31e
AMS
521 }
522}
523
524# Truncate the file at the current position
525sub _chop_file {
526 my $self = shift;
527 truncate $self->{fh}, tell($self->{fh});
528}
529
530# compute the size of a buffer suitable for moving
531# all the data in a file forward $n bytes
532# ($n may be negative)
533# The result should be at least $n.
534sub _bufsize {
535 my $n = shift;
536 return 8192 if $n < 0;
537 my $b = $n & ~8191;
538 $b += 8192 if $n & 8191;
539 $b;
540}
541
51efdd02
AMS
542# Lock the file
543sub flock {
544 my ($self, $op) = @_;
545 unless (@_ <= 3) {
546 my $pack = ref $self;
547 croak "Usage: $pack\->flock([OPERATION])";
548 }
549 my $fh = $self->{fh};
550 $op = LOCK_EX unless defined $op;
551 flock $fh, $op;
552}
b5aed31e
AMS
553
554# Given a file, make sure the cache is consistent with the
555# file contents
556sub _check_integrity {
557 my ($self, $file, $warn) = @_;
558 my $good = 1;
fa408a35 559
836d9961
JH
560
561 if (not defined $self->{offsets}[0]) {
562 $warn && print STDERR "# offset 0 is missing!\n";
563 $good = 0;
564 } elsif ($self->{offsets}[0] != 0) {
565 $warn && print STDERR "# offset 0 is missing!\n";
b5aed31e
AMS
566 $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
567 $good = 0;
568 }
fa408a35
AMS
569
570 local *F = $self->{fh};
571 seek F, 0, SEEK_SET;
572 local $/ = $self->{recsep};
573 $. = 0;
574
b5aed31e
AMS
575 while (<F>) {
576 my $n = $. - 1;
577 my $cached = $self->{cache}{$n};
578 my $offset = $self->{offsets}[$.];
579 my $ao = tell F;
580 if (defined $offset && $offset != $ao) {
581 $warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
fa408a35 582 $good = 0;
b5aed31e
AMS
583 }
584 if (defined $cached && $_ ne $cached) {
585 $good = 0;
586 chomp $cached;
587 chomp;
588 $warn && print STDERR "# rec $n: cached <$cached> actual <$_>\n";
589 }
590 }
591
592 my $cachesize = 0;
593 while (my ($n, $r) = each %{$self->{cache}}) {
594 $cachesize += length($r);
595 next if $n+1 <= $.; # checked this already
596 $warn && print STDERR "# spurious caching of record $n\n";
597 $good = 0;
598 }
599 if ($cachesize != $self->{cached}) {
600 $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n";
601 $good = 0;
602 }
603
604 my (%seen, @duplicate);
605 for (@{$self->{lru}}) {
606 $seen{$_}++;
607 if (not exists $self->{cache}{$_}) {
608 print "# $_ is mentioned in the LRU queue, but not in the cache\n";
609 $good = 0;
610 }
611 }
612 @duplicate = grep $seen{$_}>1, keys %seen;
613 if (@duplicate) {
614 my $records = @duplicate == 1 ? 'Record' : 'Records';
615 my $appear = @duplicate == 1 ? 'appears' : 'appear';
616 print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n";
617 $good = 0;
618 }
619 for (keys %{$self->{cache}}) {
620 unless (exists $seen{$_}) {
836d9961 621 print "# record $_ is in the cache but not the LRU queue\n";
b5aed31e
AMS
622 $good = 0;
623 }
624 }
625
626 $good;
627}
628
fa408a35
AMS
629"Cogito, ergo sum."; # don't forget to return a true value from the file
630
b5aed31e
AMS
631=head1 NAME
632
633Tie::File - Access the lines of a disk file via a Perl array
634
635=head1 SYNOPSIS
636
836d9961 637 # This file documents Tie::File version 0.17
b5aed31e
AMS
638
639 tie @array, 'Tie::File', filename or die ...;
640
641 $array[13] = 'blah'; # line 13 of the file is now 'blah'
642 print $array[42]; # display line 42 of the file
643
644 $n_recs = @array; # how many records are in the file?
645 $#array = $n_recs - 2; # chop records off the end
646
51efdd02
AMS
647 # As you would expect:
648
649 push @array, new recs...;
650 my $r1 = pop @array;
651 unshift @array, new recs...;
652 my $r1 = shift @array;
b5aed31e
AMS
653 @old_recs = splice @array, 3, 7, new recs...;
654
655 untie @array; # all finished
656
657=head1 DESCRIPTION
658
659C<Tie::File> represents a regular text file as a Perl array. Each
660element in the array corresponds to a record in the file. The first
661line of the file is element 0 of the array; the second line is element
6621, and so on.
663
664The file is I<not> loaded into memory, so this will work even for
665gigantic files.
666
667Changes to the array are reflected in the file immediately.
668
669=head2 C<recsep>
670
671What is a 'record'? By default, the meaning is the same as for the
672C<E<lt>...E<gt>> operator: It's a string terminated by C<$/>, which is
cc41e030
JH
673probably C<"\n">. You may change the definition of "record" by
674supplying the C<recsep> option in the C<tie> call:
675
b5aed31e
AMS
676
677 tie @array, 'Tie::File', $file, recsep => 'es';
678
679This says that records are delimited by the string C<es>. If the file contained the following data:
680
681 Curse these pesky flies!\n
682
683then the C<@array> would appear to have four elements:
684
685 "Curse thes"
686 "e pes"
687 "ky flies"
688 "!\n"
689
cc41e030
JH
690Windows users will probably want to use C<recsep =E<gt> "\r\n"> to get
691files terminated with the usual CRLF sequence.
692
b5aed31e
AMS
693An undefined value is not permitted as a record separator. Perl's
694special "paragraph mode" semantics (E<agrave> la C<$/ = "">) are not
695emulated.
696
697Records read from the tied array will have the record separator string
698on the end, just as if they were read from the C<E<lt>...E<gt>>
699operator. Records stored into the array will have the record
700separator string appended before they are written to the file, if they
701don't have one already. For example, if the record separator string
702is C<"\n">, then the following two lines do exactly the same thing:
703
704 $array[17] = "Cherry pie";
705 $array[17] = "Cherry pie\n";
706
707The result is that the contents of line 17 of the file will be
708replaced with "Cherry pie"; a newline character will separate line 17
7b6b3db1 709from line 18. This means that in particular, this will do nothing:
b5aed31e
AMS
710
711 chomp $array[17];
712
713Because the C<chomp>ed value will have the separator reattached when
714it is written back to the file. There is no way to create a file
715whose trailing record separator string is missing.
716
717Inserting records that I<contain> the record separator string will
718produce a reasonable result, but if you can't foresee what this result
719will be, you'd better avoid doing this.
720
721=head2 C<mode>
722
723Normally, the specified file will be opened for read and write access,
724and will be created if it does not exist. (That is, the flags
725C<O_RDWR | O_CREAT> are supplied in the C<open> call.) If you want to
726change this, you may supply alternative flags in the C<mode> option.
727See L<Fcntl> for a listing of available flags.
728For example:
729
730 # open the file if it exists, but fail if it does not exist
731 use Fcntl 'O_RDWR';
732 tie @array, 'Tie::File', $file, mode => O_RDWR;
733
734 # create the file if it does not exist
735 use Fcntl 'O_RDWR', 'O_CREAT';
736 tie @array, 'Tie::File', $file, mode => O_RDWR | O_CREAT;
737
738 # open an existing file in read-only mode
739 use Fcntl 'O_RDONLY';
740 tie @array, 'Tie::File', $file, mode => O_RDONLY;
741
742Opening the data file in write-only or append mode is not supported.
743
744=head2 C<cachesize>
745
746Records read in from the file are cached, to avoid having to re-read
747them repeatedly. If you read the same record twice, the first time it
748will be stored in memory, and the second time it will be fetched from
749memory.
750
751The cache has a bounded size; when it exceeds this size, the
752least-recently visited records will be purged from the cache. The
753default size is 2Mib. You can adjust the amount of space used for the
754cache by supplying the C<cachesize> option. The argument is the desired cache size, in bytes.
755
756 # I have a lot of memory, so use a large cache to speed up access
757 tie @array, 'Tie::File', $file, cachesize => 20_000_000;
758
759Setting the cache size to 0 will inhibit caching; records will be
760fetched from disk every time you examine them.
761
762=head2 Option Format
763
764C<-mode> is a synonym for C<mode>. C<-recsep> is a synonym for
765C<recsep>. C<-cachesize> is a synonym for C<cachesize>. You get the
766idea.
767
768=head1 Public Methods
769
770The C<tie> call returns an object, say C<$o>. You may call
771
772 $rec = $o->FETCH($n);
773 $o->STORE($n, $rec);
774
51efdd02
AMS
775to fetch or store the record at line C<$n>, respectively. The only other public method in this package is:
776
777=head2 C<flock>
778
779 $o->flock(MODE)
780
781will lock the tied file. C<MODE> has the same meaning as the second
782argument to the Perl built-in C<flock> function; for example
783C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
784the C<use Fcntl ':flock'> declaration.)
785
fa408a35 786C<MODE> is optional; C<$o-E<gt>flock> simply locks the file with
51efdd02
AMS
787C<LOCK_EX>.
788
789The best way to unlock a file is to discard the object and untie the
790array. It is probably unsafe to unlock the file without also untying
791it, because if you do, changes may remain unwritten inside the object.
792That is why there is no shortcut for unlocking. If you really want to
793unlock the file prematurely, you know what to do; if you don't know
794what to do, then don't do it.
795
796All the usual warnings about file locking apply here. In particular,
797note that file locking in Perl is B<advisory>, which means that
798holding a lock will not prevent anyone else from reading, writing, or
799erasing the file; it only prevents them from getting another lock at
800the same time. Locks are analogous to green traffic lights: If you
801have a green light, that does not prevent the idiot coming the other
802way from plowing into you sideways; it merely guarantees to you that
803the idiot does not also have a green light at the same time.
b5aed31e 804
fa408a35
AMS
805=head2 Tying to an already-opened filehandle
806
807If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
808of the other C<IO> modules, you may use:
809
810 tie @array, 'Tie::File', $fh, ...;
811
812Similarly if you opened that handle C<FH> with regular C<open> or
813C<sysopen>, you may use:
814
815 tie @array, 'Tie::File', \*FH, ...;
816
817Handles that were opened write-only won't work. Handles that were
818opened read-only will work as long as you don't try to write to them.
819Handles must be attached to seekable sources of data---that means no
820pipes or sockets. If you try to supply a non-seekable handle, the
836d9961
JH
821C<tie> call will try to abort your program. This feature is not yet
822supported under VMS.
fa408a35 823
b5aed31e
AMS
824=head1 CAVEATS
825
826(That's Latin for 'warnings'.)
827
828=head2 Efficiency Note
829
830Every effort was made to make this module efficient. Nevertheless,
831changing the size of a record in the middle of a large file will
fa408a35 832always be slow, because everything after the new record must be moved.
b5aed31e
AMS
833
834In particular, note that:
835
836 # million-line file
837 for (@file_array) {
838 $_ .= 'x';
839 }
840
841is likely to be very slow, because the first iteration must relocate
842lines 1 through 999,999; the second iteration must relocate lines 2
843through 999,999, and so on. The relocation is done using block
844writes, however, so it's not as slow as it might be.
845
836d9961
JH
846A soon-to-be-released version of this module will provide a mechanism
847for getting better performance in such cases, by deferring the writing
848until it can be done all at once.
b5aed31e
AMS
849
850=head2 Efficiency Note 2
851
852Not every effort was made to make this module as efficient as
853possible. C<FETCHSIZE> should use binary search instead of linear
854search. The cache's LRU queue should be a heap instead of a list.
855These defects are probably minor; in any event, they will be fixed in
856a later version of the module.
857
858=head2 Efficiency Note 3
859
860The author has supposed that since this module is concerned with file
861I/O, almost all normal use of it will be heavily I/O bound, and that
862the time to maintain complicated data structures inside the module
863will be dominated by the time to actually perform the I/O. This
fa408a35 864suggests, for example, that an LRU read-cache is a good tradeoff,
b5aed31e
AMS
865even if it requires substantial adjustment following a C<splice>
866operation.
867
51efdd02
AMS
868=head1 CAVEATS
869
870(That's Latin for 'warnings'.)
871
872The behavior of tied arrays is not precisely the same as for regular
873arrays. For example:
b5aed31e 874
51efdd02
AMS
875 undef $a[10]; print "How unusual!\n" if $a[10];
876
877C<undef>-ing a C<Tie::File> array element just blanks out the
878corresponding record in the file. When you read it back again, you'll
879see the record separator (typically, $a[10] will appear to contain
880"\n") so the supposedly-C<undef>'ed value will be true.
881
882There are other minor differences, but in general, the correspondence
883is extremely close.
b5aed31e
AMS
884
885=head1 AUTHOR
886
887Mark Jason Dominus
888
889To contact the author, send email to: C<mjd-perl-tiefile+@plover.com>
890
891To receive an announcement whenever a new version of this module is
892released, send a blank email message to
893C<mjd-perl-tiefile-subscribe@plover.com>.
894
895=head1 LICENSE
896
836d9961 897C<Tie::File> version 0.17 is copyright (C) 2002 Mark Jason Dominus.
7b6b3db1
JH
898
899This library is free software; you may redistribute it and/or modify
900it under the same terms as Perl itself.
b5aed31e 901
7b6b3db1
JH
902These terms include your choice of (1) the Perl Artistic Licence, or
903(2) version 2 of the GNU General Public License as published by the
904Free Software Foundation, or (3) any later version of the GNU General
905Public License.
b5aed31e 906
7b6b3db1 907This library is distributed in the hope that it will be useful,
b5aed31e
AMS
908but WITHOUT ANY WARRANTY; without even the implied warranty of
909MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
910GNU General Public License for more details.
911
912You should have received a copy of the GNU General Public License
7b6b3db1
JH
913along with this library program; it should be in the file C<COPYING>.
914If not, write to the Free Software Foundation, Inc., 59 Temple Place,
915Suite 330, Boston, MA 02111 USA
b5aed31e
AMS
916
917For licensing inquiries, contact the author at:
918
919 Mark Jason Dominus
920 255 S. Warnock St.
921 Philadelphia, PA 19107
922
923=head1 WARRANTY
924
836d9961 925C<Tie::File> version 0.17 comes with ABSOLUTELY NO WARRANTY.
b5aed31e
AMS
926For details, see the license.
927
fa408a35
AMS
928=head1 THANKS
929
930Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
931core when I hadn't written it yet, and for generally being helpful,
932supportive, and competent. (Usually the rule is "choose any one.")
933Also big thanks to Abhijit Menon-Sen for all of the same things.
934
935Special thanks to Craig Berry (for VMS portability help), Randy Kobes
936(for Win32 portability help), the rest of the CPAN testers (for
937testing).
b5aed31e 938
fa408a35
AMS
939More thanks to:
940Gerrit Haase /
836d9961 941Nick Ing-Simmons /
fa408a35
AMS
942Tassilo von Parseval /
943H. Dieter Pearcey /
944Peter Somu /
945Tels
7b6b3db1 946
fa408a35
AMS
947=head1 TODO
948
949Test DELETE machinery more carefully.
b5aed31e 950
fa408a35
AMS
951More tests. (Configuration options, cache flushery. _twrite should
952be tested separately, because there are a lot of weird special cases
953lurking in there.)
b5aed31e
AMS
954
955More tests. (Stuff I didn't think of yet.)
956
b5aed31e
AMS
957Deferred writing. (!!!)
958
959Paragraph mode?
960
961More tests.
962
963Fixed-length mode.
964
fa408a35
AMS
965Maybe an autolocking mode?
966
b5aed31e
AMS
967=cut
968