This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Code cleanup based on turning off the -woffs in IRIX.
[perl5.git] / ext / DB_File / DB_File.pm
CommitLineData
a0d0e21e
LW
1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
6ca2e664 3# written by Paul Marquess (Paul.Marquess@btinternet.com)
c5da4faf
PM
4# last modified 26th April 2001
5# version 1.77
36477c24 6#
c5da4faf 7# Copyright (c) 1995-2001 Paul Marquess. All rights reserved.
36477c24 8# This program is free software; you can redistribute it and/or
9# modify it under the same terms as Perl itself.
10
8e07c86e
AD
11
12package DB_File::HASHINFO ;
785da04d 13
610ab055
PM
14require 5.003 ;
15
3245f058 16use warnings;
785da04d 17use strict;
8e07c86e 18use Carp;
88108326 19require Tie::Hash;
20@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 21
88108326 22sub new
8e07c86e 23{
88108326 24 my $pkg = shift ;
25 my %x ;
26 tie %x, $pkg ;
27 bless \%x, $pkg ;
8e07c86e
AD
28}
29
610ab055 30
88108326 31sub TIEHASH
32{
33 my $pkg = shift ;
34
36477c24 35 bless { VALID => { map {$_, 1}
36 qw( bsize ffactor nelem cachesize hash lorder)
37 },
38 GOT => {}
39 }, $pkg ;
88108326 40}
8e07c86e 41
610ab055 42
8e07c86e
AD
43sub FETCH
44{
88108326 45 my $self = shift ;
46 my $key = shift ;
8e07c86e 47
36477c24 48 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 49
50 my $pkg = ref $self ;
51 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e
AD
52}
53
54
55sub STORE
56{
88108326 57 my $self = shift ;
58 my $key = shift ;
59 my $value = shift ;
60
36477c24 61 if ( exists $self->{VALID}{$key} )
8e07c86e 62 {
36477c24 63 $self->{GOT}{$key} = $value ;
8e07c86e
AD
64 return ;
65 }
66
88108326 67 my $pkg = ref $self ;
68 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e
AD
69}
70
71sub DELETE
72{
88108326 73 my $self = shift ;
74 my $key = shift ;
75
36477c24 76 if ( exists $self->{VALID}{$key} )
8e07c86e 77 {
36477c24 78 delete $self->{GOT}{$key} ;
8e07c86e
AD
79 return ;
80 }
81
88108326 82 my $pkg = ref $self ;
83 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e
AD
84}
85
88108326 86sub EXISTS
8e07c86e 87{
88108326 88 my $self = shift ;
89 my $key = shift ;
8e07c86e 90
36477c24 91 exists $self->{VALID}{$key} ;
8e07c86e
AD
92}
93
88108326 94sub NotHere
8e07c86e 95{
18d2dc8c 96 my $self = shift ;
88108326 97 my $method = shift ;
8e07c86e 98
18d2dc8c 99 croak ref($self) . " does not define the method ${method}" ;
8e07c86e
AD
100}
101
18d2dc8c
PM
102sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
103sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
104sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e
AD
105
106package DB_File::RECNOINFO ;
785da04d 107
3245f058 108use warnings;
88108326 109use strict ;
110
045291aa 111@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e
AD
112
113sub TIEHASH
114{
88108326 115 my $pkg = shift ;
116
36477c24 117 bless { VALID => { map {$_, 1}
118 qw( bval cachesize psize flags lorder reclen bfname )
119 },
120 GOT => {},
121 }, $pkg ;
8e07c86e
AD
122}
123
88108326 124package DB_File::BTREEINFO ;
8e07c86e 125
3245f058 126use warnings;
88108326 127use strict ;
8e07c86e 128
88108326 129@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 130
88108326 131sub TIEHASH
8e07c86e 132{
88108326 133 my $pkg = shift ;
134
36477c24 135 bless { VALID => { map {$_, 1}
136 qw( flags cachesize maxkeypage minkeypage psize
137 compare prefix lorder )
138 },
139 GOT => {},
140 }, $pkg ;
8e07c86e
AD
141}
142
143
8e07c86e 144package DB_File ;
785da04d 145
3245f058 146use warnings;
785da04d 147use strict;
b90e71be
GS
148use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO
149 $db_version $use_XSLoader
150 ) ;
8e07c86e
AD
151use Carp;
152
785da04d 153
c5da4faf 154$VERSION = "1.77" ;
8e07c86e
AD
155
156#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 157$DB_BTREE = new DB_File::BTREEINFO ;
158$DB_HASH = new DB_File::HASHINFO ;
159$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 160
785da04d 161require Tie::Hash;
8e07c86e
AD
162require Exporter;
163use AutoLoader;
b90e71be
GS
164BEGIN {
165 $use_XSLoader = 1 ;
166 eval { require XSLoader } ;
167
168 if ($@) {
169 $use_XSLoader = 0 ;
170 require DynaLoader;
171 @ISA = qw(DynaLoader);
172 }
173}
174
175push @ISA, qw(Tie::Hash Exporter);
8e07c86e
AD
176@EXPORT = qw(
177 $DB_BTREE $DB_HASH $DB_RECNO
88108326 178
8e07c86e
AD
179 BTREEMAGIC
180 BTREEVERSION
181 DB_LOCK
182 DB_SHMEM
183 DB_TXN
184 HASHMAGIC
185 HASHVERSION
186 MAX_PAGE_NUMBER
187 MAX_PAGE_OFFSET
188 MAX_REC_NUMBER
189 RET_ERROR
190 RET_SPECIAL
191 RET_SUCCESS
192 R_CURSOR
193 R_DUP
194 R_FIRST
195 R_FIXEDLEN
196 R_IAFTER
197 R_IBEFORE
198 R_LAST
199 R_NEXT
200 R_NOKEY
201 R_NOOVERWRITE
202 R_PREV
203 R_RECNOSYNC
204 R_SETCURSOR
205 R_SNAPSHOT
206 __R_UNUSED
88108326 207
045291aa 208);
8e07c86e
AD
209
210sub AUTOLOAD {
785da04d 211 my($constname);
8e07c86e 212 ($constname = $AUTOLOAD) =~ s/.*:://;
785da04d 213 my $val = constant($constname, @_ ? $_[0] : 0);
8e07c86e 214 if ($! != 0) {
265f5c4a 215 if ($! =~ /Invalid/ || $!{EINVAL}) {
8e07c86e
AD
216 $AutoLoader::AUTOLOAD = $AUTOLOAD;
217 goto &AutoLoader::AUTOLOAD;
218 }
219 else {
785da04d 220 my($pack,$file,$line) = caller;
8e07c86e
AD
221 croak "Your vendor has not defined DB macro $constname, used at $file line $line.
222";
223 }
224 }
225 eval "sub $AUTOLOAD { $val }";
226 goto &$AUTOLOAD;
227}
228
f6b705ef 229
a6ed719b 230eval {
1f70e1ea
PM
231 # Make all Fcntl O_XXX constants available for importing
232 require Fcntl;
233 my @O = grep /^O_/, @Fcntl::EXPORT;
234 Fcntl->import(@O); # first we import what we want to export
235 push(@EXPORT, @O);
a6ed719b 236};
f6b705ef 237
b90e71be
GS
238if ($use_XSLoader)
239 { XSLoader::load("DB_File", $VERSION)}
240else
241 { bootstrap DB_File $VERSION }
8e07c86e
AD
242
243# Preloaded methods go here. Autoload methods go after __END__, and are
244# processed by the autosplit program.
245
05475680 246sub tie_hash_or_array
610ab055
PM
247{
248 my (@arg) = @_ ;
05475680 249 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055
PM
250
251 $arg[4] = tied %{ $arg[4] }
252 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
253
1f70e1ea
PM
254 # make recno in Berkeley DB version 2 work like recno in version 1.
255 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
256 $arg[1] and ! -e $arg[1]) {
257 open(FH, ">$arg[1]") or return undef ;
258 close FH ;
259 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
260 }
261
05475680 262 DoTie_($tieHASH, @arg) ;
610ab055
PM
263}
264
05475680
PM
265sub TIEHASH
266{
267 tie_hash_or_array(@_) ;
268}
269
270sub TIEARRAY
271{
272 tie_hash_or_array(@_) ;
273}
88108326 274
045291aa
PM
275sub CLEAR
276{
1f70e1ea 277 my $self = shift;
3245f058 278 my $key = 0 ;
1f70e1ea
PM
279 my $value = "" ;
280 my $status = $self->seq($key, $value, R_FIRST());
281 my @keys;
282
283 while ($status == 0) {
284 push @keys, $key;
285 $status = $self->seq($key, $value, R_NEXT());
286 }
287 foreach $key (reverse @keys) {
288 my $s = $self->del($key);
289 }
290}
291
045291aa
PM
292sub EXTEND { }
293
294sub STORESIZE
295{
296 my $self = shift;
297 my $length = shift ;
298 my $current_length = $self->length() ;
299
300 if ($length < $current_length) {
301 my $key ;
302 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
303 { $self->del($key) }
304 }
a9fd575d
PM
305 elsif ($length > $current_length) {
306 $self->put($length-1, "") ;
307 }
045291aa
PM
308}
309
c5da4faf
PM
310
311sub SPLICE
312{
313 my $self = shift;
314 my $offset = shift;
315 if (not defined $offset) {
316 carp 'Use of uninitialized value in splice';
317 $offset = 0;
318 }
319
320 my $length = @_ ? shift : 0;
321 # Carping about definedness comes _after_ the OFFSET sanity check.
322 # This is so we get the same error messages as Perl's splice().
323 #
324
325 my @list = @_;
326
327 my $size = $self->FETCHSIZE();
328
329 # 'If OFFSET is negative then it start that far from the end of
330 # the array.'
331 #
332 if ($offset < 0) {
333 my $new_offset = $size + $offset;
334 if ($new_offset < 0) {
335 die "Modification of non-creatable array value attempted, "
336 . "subscript $offset";
337 }
338 $offset = $new_offset;
339 }
340
341 if ($offset > $size) {
342 $offset = $size;
343 }
344
345 if (not defined $length) {
346 carp 'Use of uninitialized value in splice';
347 $length = 0;
348 }
349
350 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
351 if (not defined $length) {
352 $length = $size - $offset;
353 }
354
355 # 'If LENGTH is negative, leave that many elements off the end of
356 # the array.'
357 #
358 if ($length < 0) {
359 $length = $size - $offset + $length;
360
361 if ($length < 0) {
362 # The user must have specified a length bigger than the
363 # length of the array passed in. But perl's splice()
364 # doesn't catch this, it just behaves as for length=0.
365 #
366 $length = 0;
367 }
368 }
369
370 if ($length > $size - $offset) {
371 $length = $size - $offset;
372 }
373
374 # $num_elems holds the current number of elements in the database.
375 my $num_elems = $size;
376
377 # 'Removes the elements designated by OFFSET and LENGTH from an
378 # array,'...
379 #
380 my @removed = ();
381 foreach (0 .. $length - 1) {
382 my $old;
383 my $status = $self->get($offset, $old);
384 if ($status != 0) {
385 my $msg = "error from Berkeley DB on get($offset, \$old)";
386 if ($status == 1) {
387 $msg .= ' (no such element?)';
388 }
389 else {
390 $msg .= ": error status $status";
391 if (defined $! and $! ne '') {
392 $msg .= ", message $!";
393 }
394 }
395 die $msg;
396 }
397 push @removed, $old;
398
399 $status = $self->del($offset);
400 if ($status != 0) {
401 my $msg = "error from Berkeley DB on del($offset)";
402 if ($status == 1) {
403 $msg .= ' (no such element?)';
404 }
405 else {
406 $msg .= ": error status $status";
407 if (defined $! and $! ne '') {
408 $msg .= ", message $!";
409 }
410 }
411 die $msg;
412 }
413
414 -- $num_elems;
415 }
416
417 # ...'and replaces them with the elements of LIST, if any.'
418 my $pos = $offset;
419 while (defined (my $elem = shift @list)) {
420 my $old_pos = $pos;
421 my $status;
422 if ($pos >= $num_elems) {
423 $status = $self->put($pos, $elem);
424 }
425 else {
426 $status = $self->put($pos, $elem, $self->R_IBEFORE);
427 }
428
429 if ($status != 0) {
430 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
431 if ($status == 1) {
432 $msg .= ' (no such element?)';
433 }
434 else {
435 $msg .= ", error status $status";
436 if (defined $! and $! ne '') {
437 $msg .= ", message $!";
438 }
439 }
440 die $msg;
441 }
442
443 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
444 if $old_pos != $pos;
445
446 ++ $pos;
447 ++ $num_elems;
448 }
449
450 if (wantarray) {
451 # 'In list context, returns the elements removed from the
452 # array.'
453 #
454 return @removed;
455 }
456 elsif (defined wantarray and not wantarray) {
457 # 'In scalar context, returns the last element removed, or
458 # undef if no elements are removed.'
459 #
460 if (@removed) {
461 my $last = pop @removed;
462 return "$last";
463 }
464 else {
465 return undef;
466 }
467 }
468 elsif (not defined wantarray) {
469 # Void context
470 }
471 else { die }
472}
473sub ::DB_File::splice { &SPLICE }
474
6ca2e664
PM
475sub find_dup
476{
477 croak "Usage: \$db->find_dup(key,value)\n"
478 unless @_ == 3 ;
479
480 my $db = shift ;
481 my ($origkey, $value_wanted) = @_ ;
482 my ($key, $value) = ($origkey, 0);
483 my ($status) = 0 ;
484
485 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
486 $status == 0 ;
487 $status = $db->seq($key, $value, R_NEXT() ) ) {
488
489 return 0 if $key eq $origkey and $value eq $value_wanted ;
490 }
491
492 return $status ;
493}
494
495sub del_dup
496{
497 croak "Usage: \$db->del_dup(key,value)\n"
498 unless @_ == 3 ;
499
500 my $db = shift ;
501 my ($key, $value) = @_ ;
502 my ($status) = $db->find_dup($key, $value) ;
503 return $status if $status != 0 ;
504
505 $status = $db->del($key, R_CURSOR() ) ;
506 return $status ;
507}
508
88108326 509sub get_dup
510{
511 croak "Usage: \$db->get_dup(key [,flag])\n"
512 unless @_ == 2 or @_ == 3 ;
513
514 my $db = shift ;
515 my $key = shift ;
516 my $flag = shift ;
f6b705ef 517 my $value = 0 ;
88108326 518 my $origkey = $key ;
519 my $wantarray = wantarray ;
f6b705ef 520 my %values = () ;
88108326 521 my @values = () ;
522 my $counter = 0 ;
f6b705ef 523 my $status = 0 ;
88108326 524
f6b705ef 525 # iterate through the database until either EOF ($status == 0)
526 # or a different key is encountered ($key ne $origkey).
527 for ($status = $db->seq($key, $value, R_CURSOR()) ;
528 $status == 0 and $key eq $origkey ;
529 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 530
f6b705ef 531 # save the value or count number of matches
532 if ($wantarray) {
533 if ($flag)
534 { ++ $values{$value} }
535 else
536 { push (@values, $value) }
537 }
538 else
539 { ++ $counter }
88108326 540
88108326 541 }
542
f6b705ef 543 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 544}
545
546
8e07c86e
AD
5471;
548__END__
549
3b35bae3
AD
550=head1 NAME
551
1f70e1ea 552DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3
AD
553
554=head1 SYNOPSIS
555
bbc7dcd2
MS
556 use DB_File;
557
88108326 558 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
559 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
560 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 561
3b35bae3
AD
562 $status = $X->del($key [, $flags]) ;
563 $status = $X->put($key, $value [, $flags]) ;
564 $status = $X->get($key, $value [, $flags]) ;
760ac839 565 $status = $X->seq($key, $value, $flags) ;
3b35bae3
AD
566 $status = $X->sync([$flags]) ;
567 $status = $X->fd ;
760ac839 568
f6b705ef 569 # BTREE only
88108326 570 $count = $X->get_dup($key) ;
571 @list = $X->get_dup($key) ;
572 %list = $X->get_dup($key, 1) ;
6ca2e664
PM
573 $status = $X->find_dup($key, $value) ;
574 $status = $X->del_dup($key, $value) ;
88108326 575
f6b705ef 576 # RECNO only
577 $a = $X->length;
578 $a = $X->pop ;
579 $X->push(list);
580 $a = $X->shift;
581 $X->unshift(list);
c5da4faf 582 @r = $X->splice(offset, length, elements);
f6b705ef 583
cad2e5aa
JH
584 # DBM Filters
585 $old_filter = $db->filter_store_key ( sub { ... } ) ;
586 $old_filter = $db->filter_store_value( sub { ... } ) ;
587 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
588 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
589
3b35bae3
AD
590 untie %hash ;
591 untie @array ;
592
593=head1 DESCRIPTION
594
8e07c86e 595B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 596facilities provided by Berkeley DB version 1.x (if you have a newer
039d031f
PM
597version of DB, see L<Using DB_File with Berkeley DB version 2 or 3>).
598It is assumed that you have a copy of the Berkeley DB manual pages at
599hand when reading this documentation. The interface defined here
600mirrors the Berkeley DB interface closely.
68dc0745 601
8e07c86e
AD
602Berkeley DB is a C library which provides a consistent interface to a
603number of database formats. B<DB_File> provides an interface to all
604three of the database types currently supported by Berkeley DB.
3b35bae3
AD
605
606The file types are:
607
608=over 5
609
88108326 610=item B<DB_HASH>
3b35bae3 611
88108326 612This database type allows arbitrary key/value pairs to be stored in data
8e07c86e
AD
613files. This is equivalent to the functionality provided by other
614hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
615the files created using DB_HASH are not compatible with any of the
616other packages mentioned.
3b35bae3 617
8e07c86e
AD
618A default hashing algorithm, which will be adequate for most
619applications, is built into Berkeley DB. If you do need to use your own
620hashing algorithm it is possible to write your own in Perl and have
621B<DB_File> use it instead.
3b35bae3 622
88108326 623=item B<DB_BTREE>
624
625The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 626sorted, balanced binary tree.
3b35bae3 627
8e07c86e
AD
628As with the DB_HASH format, it is possible to provide a user defined
629Perl routine to perform the comparison of keys. By default, though, the
630keys are stored in lexical order.
3b35bae3 631
88108326 632=item B<DB_RECNO>
3b35bae3 633
8e07c86e
AD
634DB_RECNO allows both fixed-length and variable-length flat text files
635to be manipulated using the same key/value pair interface as in DB_HASH
636and DB_BTREE. In this case the key will consist of a record (line)
637number.
3b35bae3
AD
638
639=back
640
039d031f 641=head2 Using DB_File with Berkeley DB version 2 or 3
1f70e1ea
PM
642
643Although B<DB_File> is intended to be used with Berkeley DB version 1,
c5da4faf 644it can also be used with version 2 or 3. In this case the interface is
1f70e1ea 645limited to the functionality provided by Berkeley DB 1.x. Anywhere the
039d031f
PM
646version 2 or 3 interface differs, B<DB_File> arranges for it to work
647like version 1. This feature allows B<DB_File> scripts that were built
648with version 1 to be migrated to version 2 or 3 without any changes.
1f70e1ea
PM
649
650If you want to make use of the new features available in Berkeley DB
b90e71be 6512.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 652
039d031f
PM
653B<Note:> The database file format has changed in both Berkeley DB
654version 2 and 3. If you cannot recreate your databases, you must dump
c5da4faf
PM
655any existing databases with either the C<db_dump> or the C<db_dump185>
656utility that comes with Berkeley DB.
039d031f 657Once you have rebuilt DB_File to use Berkeley DB version 2 or 3, your
1f70e1ea
PM
658databases can be recreated using C<db_load>. Refer to the Berkeley DB
659documentation for further details.
660
039d031f
PM
661Please read L<"COPYRIGHT"> before using version 2.x or 3.x of Berkeley
662DB with DB_File.
1f70e1ea 663
68dc0745 664=head2 Interface to Berkeley DB
3b35bae3
AD
665
666B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e
AD
667in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
668allows B<DB_File> to access Berkeley DB files using either an
669associative array (for DB_HASH & DB_BTREE file types) or an ordinary
670array (for the DB_RECNO file type).
3b35bae3 671
88108326 672In addition to the tie() interface, it is also possible to access most
673of the functions provided in the Berkeley DB API directly.
f6b705ef 674See L<THE API INTERFACE>.
3b35bae3 675
88108326 676=head2 Opening a Berkeley DB Database File
3b35bae3 677
8e07c86e 678Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 679Here is the C prototype for dbopen():
3b35bae3
AD
680
681 DB*
682 dbopen (const char * file, int flags, int mode,
683 DBTYPE type, const void * openinfo)
684
685The parameter C<type> is an enumeration which specifies which of the 3
686interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
687Depending on which of these is actually chosen, the final parameter,
688I<openinfo> points to a data structure which allows tailoring of the
689specific interface method.
690
8e07c86e 691This interface is handled slightly differently in B<DB_File>. Here is
88108326 692an equivalent call using B<DB_File>:
3b35bae3 693
88108326 694 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 695
8e07c86e
AD
696The C<filename>, C<flags> and C<mode> parameters are the direct
697equivalent of their dbopen() counterparts. The final parameter $DB_HASH
698performs the function of both the C<type> and C<openinfo> parameters in
699dbopen().
3b35bae3 700
88108326 701In the example above $DB_HASH is actually a pre-defined reference to a
702hash object. B<DB_File> has three of these pre-defined references.
703Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 704
8e07c86e
AD
705The keys allowed in each of these pre-defined references is limited to
706the names used in the equivalent C structure. So, for example, the
707$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 708C<ffactor>, C<hash>, C<lorder> and C<nelem>.
709
710To change one of these elements, just assign to it like this:
711
712 $DB_HASH->{'cachesize'} = 10000 ;
713
714The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
715usually adequate for most applications. If you do need to create extra
716instances of these objects, constructors are available for each file
717type.
718
719Here are examples of the constructors and the valid options available
720for DB_HASH, DB_BTREE and DB_RECNO respectively.
721
722 $a = new DB_File::HASHINFO ;
723 $a->{'bsize'} ;
724 $a->{'cachesize'} ;
725 $a->{'ffactor'};
726 $a->{'hash'} ;
727 $a->{'lorder'} ;
728 $a->{'nelem'} ;
729
730 $b = new DB_File::BTREEINFO ;
731 $b->{'flags'} ;
732 $b->{'cachesize'} ;
733 $b->{'maxkeypage'} ;
734 $b->{'minkeypage'} ;
735 $b->{'psize'} ;
736 $b->{'compare'} ;
737 $b->{'prefix'} ;
738 $b->{'lorder'} ;
739
740 $c = new DB_File::RECNOINFO ;
741 $c->{'bval'} ;
742 $c->{'cachesize'} ;
743 $c->{'psize'} ;
744 $c->{'flags'} ;
745 $c->{'lorder'} ;
746 $c->{'reclen'} ;
747 $c->{'bfname'} ;
748
749The values stored in the hashes above are mostly the direct equivalent
750of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 751default values - that means you don't have to set I<all> of the
88108326 752values when you only want to change one. Here is an example:
753
754 $a = new DB_File::HASHINFO ;
755 $a->{'cachesize'} = 12345 ;
756 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
757
36477c24 758A few of the options need extra discussion here. When used, the C
88108326 759equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
760to C functions. In B<DB_File> these keys are used to store references
761to Perl subs. Below are templates for each of the subs:
762
763 sub hash
764 {
765 my ($data) = @_ ;
766 ...
767 # return the hash value for $data
768 return $hash ;
769 }
3b35bae3 770
88108326 771 sub compare
772 {
773 my ($key, $key2) = @_ ;
774 ...
775 # return 0 if $key1 eq $key2
776 # -1 if $key1 lt $key2
777 # 1 if $key1 gt $key2
778 return (-1 , 0 or 1) ;
779 }
3b35bae3 780
88108326 781 sub prefix
782 {
783 my ($key, $key2) = @_ ;
784 ...
785 # return number of bytes of $key2 which are
786 # necessary to determine that it is greater than $key1
787 return $bytes ;
788 }
3b35bae3 789
f6b705ef 790See L<Changing the BTREE sort order> for an example of using the
791C<compare> template.
88108326 792
36477c24 793If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 794C<bval>, you should check out L<The 'bval' Option>.
36477c24 795
88108326 796=head2 Default Parameters
797
798It is possible to omit some or all of the final 4 parameters in the
799call to C<tie> and let them take default values. As DB_HASH is the most
800common file format used, the call:
801
802 tie %A, "DB_File", "filename" ;
803
804is equivalent to:
805
18d2dc8c 806 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 807
808It is also possible to omit the filename parameter as well, so the
809call:
810
811 tie %A, "DB_File" ;
812
813is equivalent to:
814
18d2dc8c 815 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 816
f6b705ef 817See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 818in place of a filename.
819
f6b705ef 820=head2 In Memory Databases
821
822Berkeley DB allows the creation of in-memory databases by using NULL
823(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
824uses C<undef> instead of NULL to provide this functionality.
825
826=head1 DB_HASH
827
828The DB_HASH file format is probably the most commonly used of the three
829file formats that B<DB_File> supports. It is also very straightforward
830to use.
831
68dc0745 832=head2 A Simple Example
f6b705ef 833
834This example shows how to create a database, add key/value pairs to the
835database, delete keys/value pairs and finally how to enumerate the
836contents of the database.
837
3245f058 838 use warnings ;
610ab055 839 use strict ;
f6b705ef 840 use DB_File ;
610ab055 841 use vars qw( %h $k $v ) ;
f6b705ef 842
2c2d71f5 843 unlink "fruit" ;
f6b705ef 844 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH
845 or die "Cannot open file 'fruit': $!\n";
846
847 # Add a few key/value pairs to the file
848 $h{"apple"} = "red" ;
849 $h{"orange"} = "orange" ;
850 $h{"banana"} = "yellow" ;
851 $h{"tomato"} = "red" ;
852
853 # Check for existence of a key
854 print "Banana Exists\n\n" if $h{"banana"} ;
855
856 # Delete a key/value pair.
857 delete $h{"apple"} ;
858
859 # print the contents of the file
860 while (($k, $v) = each %h)
861 { print "$k -> $v\n" }
862
863 untie %h ;
864
865here is the output:
866
867 Banana Exists
bbc7dcd2 868
f6b705ef 869 orange -> orange
870 tomato -> red
871 banana -> yellow
872
873Note that the like ordinary associative arrays, the order of the keys
874retrieved is in an apparently random order.
875
876=head1 DB_BTREE
877
878The DB_BTREE format is useful when you want to store data in a given
879order. By default the keys will be stored in lexical order, but as you
880will see from the example shown in the next section, it is very easy to
881define your own sorting function.
882
883=head2 Changing the BTREE sort order
884
885This script shows how to override the default sorting algorithm that
886BTREE uses. Instead of using the normal lexical ordering, a case
887insensitive compare function will be used.
88108326 888
3245f058 889 use warnings ;
610ab055 890 use strict ;
f6b705ef 891 use DB_File ;
610ab055
PM
892
893 my %h ;
f6b705ef 894
895 sub Compare
896 {
897 my ($key1, $key2) = @_ ;
898 "\L$key1" cmp "\L$key2" ;
899 }
900
901 # specify the Perl sub that will do the comparison
902 $DB_BTREE->{'compare'} = \&Compare ;
903
2c2d71f5 904 unlink "tree" ;
f6b705ef 905 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE
906 or die "Cannot open file 'tree': $!\n" ;
907
908 # Add a key/value pair to the file
909 $h{'Wall'} = 'Larry' ;
910 $h{'Smith'} = 'John' ;
911 $h{'mouse'} = 'mickey' ;
912 $h{'duck'} = 'donald' ;
913
914 # Delete
915 delete $h{"duck"} ;
916
917 # Cycle through the keys printing them in order.
918 # Note it is not necessary to sort the keys as
919 # the btree will have kept them in order automatically.
920 foreach (keys %h)
921 { print "$_\n" }
922
923 untie %h ;
924
925Here is the output from the code above.
926
927 mouse
928 Smith
929 Wall
930
931There are a few point to bear in mind if you want to change the
932ordering in a BTREE database:
933
934=over 5
935
936=item 1.
937
938The new compare function must be specified when you create the database.
939
940=item 2.
941
942You cannot change the ordering once the database has been created. Thus
943you must use the same compare function every time you access the
88108326 944database.
945
f6b705ef 946=back
947
68dc0745 948=head2 Handling Duplicate Keys
f6b705ef 949
950The BTREE file type optionally allows a single key to be associated
951with an arbitrary number of values. This option is enabled by setting
952the flags element of C<$DB_BTREE> to R_DUP when creating the database.
953
88108326 954There are some difficulties in using the tied hash interface if you
955want to manipulate a BTREE database with duplicate keys. Consider this
956code:
957
3245f058 958 use warnings ;
610ab055 959 use strict ;
88108326 960 use DB_File ;
610ab055
PM
961
962 use vars qw($filename %h ) ;
963
88108326 964 $filename = "tree" ;
965 unlink $filename ;
bbc7dcd2 966
88108326 967 # Enable duplicate records
968 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 969
88108326 970 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
971 or die "Cannot open $filename: $!\n";
bbc7dcd2 972
88108326 973 # Add some key/value pairs to the file
974 $h{'Wall'} = 'Larry' ;
975 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 976 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 977 $h{'Smith'} = 'John' ;
978 $h{'mouse'} = 'mickey' ;
979
980 # iterate through the associative array
981 # and print each key/value pair.
2c2d71f5 982 foreach (sort keys %h)
88108326 983 { print "$_ -> $h{$_}\n" }
984
f6b705ef 985 untie %h ;
986
88108326 987Here is the output:
988
989 Smith -> John
990 Wall -> Larry
991 Wall -> Larry
f6b705ef 992 Wall -> Larry
88108326 993 mouse -> mickey
994
f6b705ef 995As you can see 3 records have been successfully created with key C<Wall>
88108326 996- the only thing is, when they are retrieved from the database they
f6b705ef 997I<seem> to have the same value, namely C<Larry>. The problem is caused
998by the way that the associative array interface works. Basically, when
999the associative array interface is used to fetch the value associated
1000with a given key, it will only ever retrieve the first value.
88108326 1001
1002Although it may not be immediately obvious from the code above, the
1003associative array interface can be used to write values with duplicate
1004keys, but it cannot be used to read them back from the database.
1005
1006The way to get around this problem is to use the Berkeley DB API method
1007called C<seq>. This method allows sequential access to key/value
f6b705ef 1008pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1009and the API in general.
88108326 1010
1011Here is the script above rewritten using the C<seq> API method.
1012
3245f058 1013 use warnings ;
610ab055 1014 use strict ;
88108326 1015 use DB_File ;
bbc7dcd2 1016
610ab055
PM
1017 use vars qw($filename $x %h $status $key $value) ;
1018
88108326 1019 $filename = "tree" ;
1020 unlink $filename ;
bbc7dcd2 1021
88108326 1022 # Enable duplicate records
1023 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1024
88108326 1025 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1026 or die "Cannot open $filename: $!\n";
bbc7dcd2 1027
88108326 1028 # Add some key/value pairs to the file
1029 $h{'Wall'} = 'Larry' ;
1030 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1031 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1032 $h{'Smith'} = 'John' ;
1033 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1034
f6b705ef 1035 # iterate through the btree using seq
88108326 1036 # and print each key/value pair.
610ab055 1037 $key = $value = 0 ;
f6b705ef 1038 for ($status = $x->seq($key, $value, R_FIRST) ;
1039 $status == 0 ;
1040 $status = $x->seq($key, $value, R_NEXT) )
88108326 1041 { print "$key -> $value\n" }
bbc7dcd2 1042
88108326 1043 undef $x ;
1044 untie %h ;
1045
1046that prints:
1047
1048 Smith -> John
1049 Wall -> Brick
f6b705ef 1050 Wall -> Brick
88108326 1051 Wall -> Larry
1052 mouse -> mickey
1053
f6b705ef 1054This time we have got all the key/value pairs, including the multiple
88108326 1055values associated with the key C<Wall>.
1056
6ca2e664
PM
1057To make life easier when dealing with duplicate keys, B<DB_File> comes with
1058a few utility methods.
1059
68dc0745 1060=head2 The get_dup() Method
f6b705ef 1061
6ca2e664 1062The C<get_dup> method assists in
88108326 1063reading duplicate values from BTREE databases. The method can take the
1064following forms:
1065
1066 $count = $x->get_dup($key) ;
1067 @list = $x->get_dup($key) ;
1068 %list = $x->get_dup($key, 1) ;
1069
1070In a scalar context the method returns the number of values associated
1071with the key, C<$key>.
1072
1073In list context, it returns all the values which match C<$key>. Note
f6b705ef 1074that the values will be returned in an apparently random order.
88108326 1075
7a2e2cd6 1076In list context, if the second parameter is present and evaluates
1077TRUE, the method returns an associative array. The keys of the
1078associative array correspond to the values that matched in the BTREE
1079and the values of the array are a count of the number of times that
1080particular value occurred in the BTREE.
88108326 1081
f6b705ef 1082So assuming the database created above, we can use C<get_dup> like
88108326 1083this:
1084
3245f058 1085 use warnings ;
2c2d71f5
JH
1086 use strict ;
1087 use DB_File ;
bbc7dcd2 1088
2c2d71f5
JH
1089 use vars qw($filename $x %h ) ;
1090
1091 $filename = "tree" ;
bbc7dcd2 1092
2c2d71f5
JH
1093 # Enable duplicate records
1094 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1095
2c2d71f5
JH
1096 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1097 or die "Cannot open $filename: $!\n";
1098
610ab055 1099 my $cnt = $x->get_dup("Wall") ;
88108326 1100 print "Wall occurred $cnt times\n" ;
1101
610ab055 1102 my %hash = $x->get_dup("Wall", 1) ;
88108326 1103 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1104 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1105
2c2d71f5 1106 my @list = sort $x->get_dup("Wall") ;
88108326 1107 print "Wall => [@list]\n" ;
1108
f6b705ef 1109 @list = $x->get_dup("Smith") ;
88108326 1110 print "Smith => [@list]\n" ;
bbc7dcd2 1111
f6b705ef 1112 @list = $x->get_dup("Dog") ;
88108326 1113 print "Dog => [@list]\n" ;
1114
1115
1116and it will print:
1117
f6b705ef 1118 Wall occurred 3 times
88108326 1119 Larry is there
f6b705ef 1120 There are 2 Brick Walls
1121 Wall => [Brick Brick Larry]
88108326 1122 Smith => [John]
1123 Dog => []
3b35bae3 1124
6ca2e664
PM
1125=head2 The find_dup() Method
1126
1127 $status = $X->find_dup($key, $value) ;
1128
b90e71be 1129This method checks for the existence of a specific key/value pair. If the
6ca2e664
PM
1130pair exists, the cursor is left pointing to the pair and the method
1131returns 0. Otherwise the method returns a non-zero value.
1132
1133Assuming the database from the previous example:
1134
3245f058 1135 use warnings ;
6ca2e664
PM
1136 use strict ;
1137 use DB_File ;
bbc7dcd2 1138
6ca2e664
PM
1139 use vars qw($filename $x %h $found) ;
1140
1141 my $filename = "tree" ;
bbc7dcd2 1142
6ca2e664
PM
1143 # Enable duplicate records
1144 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1145
6ca2e664
PM
1146 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1147 or die "Cannot open $filename: $!\n";
1148
1149 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1150 print "Larry Wall is $found there\n" ;
bbc7dcd2 1151
6ca2e664
PM
1152 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1153 print "Harry Wall is $found there\n" ;
bbc7dcd2 1154
6ca2e664
PM
1155 undef $x ;
1156 untie %h ;
1157
1158prints this
1159
2c2d71f5 1160 Larry Wall is there
6ca2e664
PM
1161 Harry Wall is not there
1162
1163
1164=head2 The del_dup() Method
1165
1166 $status = $X->del_dup($key, $value) ;
1167
1168This method deletes a specific key/value pair. It returns
11690 if they exist and have been deleted successfully.
1170Otherwise the method returns a non-zero value.
1171
b90e71be 1172Again assuming the existence of the C<tree> database
6ca2e664 1173
3245f058 1174 use warnings ;
6ca2e664
PM
1175 use strict ;
1176 use DB_File ;
bbc7dcd2 1177
6ca2e664
PM
1178 use vars qw($filename $x %h $found) ;
1179
1180 my $filename = "tree" ;
bbc7dcd2 1181
6ca2e664
PM
1182 # Enable duplicate records
1183 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1184
6ca2e664
PM
1185 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1186 or die "Cannot open $filename: $!\n";
1187
1188 $x->del_dup("Wall", "Larry") ;
1189
1190 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1191 print "Larry Wall is $found there\n" ;
bbc7dcd2 1192
6ca2e664
PM
1193 undef $x ;
1194 untie %h ;
1195
1196prints this
1197
1198 Larry Wall is not there
1199
f6b705ef 1200=head2 Matching Partial Keys
1201
1202The BTREE interface has a feature which allows partial keys to be
1203matched. This functionality is I<only> available when the C<seq> method
1204is used along with the R_CURSOR flag.
1205
1206 $x->seq($key, $value, R_CURSOR) ;
1207
1208Here is the relevant quote from the dbopen man page where it defines
1209the use of the R_CURSOR flag with seq:
1210
f6b705ef 1211 Note, for the DB_BTREE access method, the returned key is not
1212 necessarily an exact match for the specified key. The returned key
1213 is the smallest key greater than or equal to the specified key,
1214 permitting partial key matches and range searches.
1215
f6b705ef 1216In the example script below, the C<match> sub uses this feature to find
1217and print the first matching key/value pair given a partial key.
1218
3245f058 1219 use warnings ;
610ab055 1220 use strict ;
f6b705ef 1221 use DB_File ;
1222 use Fcntl ;
610ab055
PM
1223
1224 use vars qw($filename $x %h $st $key $value) ;
f6b705ef 1225
1226 sub match
1227 {
1228 my $key = shift ;
610ab055 1229 my $value = 0;
f6b705ef 1230 my $orig_key = $key ;
1231 $x->seq($key, $value, R_CURSOR) ;
1232 print "$orig_key\t-> $key\t-> $value\n" ;
1233 }
1234
1235 $filename = "tree" ;
1236 unlink $filename ;
1237
1238 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE
1239 or die "Cannot open $filename: $!\n";
bbc7dcd2 1240
f6b705ef 1241 # Add some key/value pairs to the file
1242 $h{'mouse'} = 'mickey' ;
1243 $h{'Wall'} = 'Larry' ;
1244 $h{'Walls'} = 'Brick' ;
1245 $h{'Smith'} = 'John' ;
bbc7dcd2 1246
f6b705ef 1247
610ab055 1248 $key = $value = 0 ;
f6b705ef 1249 print "IN ORDER\n" ;
1250 for ($st = $x->seq($key, $value, R_FIRST) ;
1251 $st == 0 ;
1252 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1253
2c2d71f5 1254 { print "$key -> $value\n" }
bbc7dcd2 1255
f6b705ef 1256 print "\nPARTIAL MATCH\n" ;
1257
1258 match "Wa" ;
1259 match "A" ;
1260 match "a" ;
1261
1262 undef $x ;
1263 untie %h ;
1264
1265Here is the output:
1266
1267 IN ORDER
1268 Smith -> John
1269 Wall -> Larry
1270 Walls -> Brick
1271 mouse -> mickey
1272
1273 PARTIAL MATCH
1274 Wa -> Wall -> Larry
1275 A -> Smith -> John
1276 a -> mouse -> mickey
1277
1278=head1 DB_RECNO
1279
1280DB_RECNO provides an interface to flat text files. Both variable and
1281fixed length records are supported.
3b35bae3 1282
6ca2e664 1283In order to make RECNO more compatible with Perl, the array offset for
88108326 1284all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1285
88108326 1286As with normal Perl arrays, a RECNO array can be accessed using
1287negative indexes. The index -1 refers to the last element of the array,
1288-2 the second last, and so on. Attempting to access an element before
1289the start of the array will raise a fatal run-time error.
3b35bae3 1290
68dc0745 1291=head2 The 'bval' Option
36477c24 1292
1293The operation of the bval option warrants some discussion. Here is the
1294definition of bval from the Berkeley DB 1.85 recno manual page:
1295
1296 The delimiting byte to be used to mark the end of a
1297 record for variable-length records, and the pad charac-
1298 ter for fixed-length records. If no value is speci-
1299 fied, newlines (``\n'') are used to mark the end of
1300 variable-length records and fixed-length records are
1301 padded with spaces.
1302
1303The second sentence is wrong. In actual fact bval will only default to
1304C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1305openinfo parameter is used at all, the value that happens to be in bval
1306will be used. That means you always have to specify bval when making
1307use of any of the options in the openinfo parameter. This documentation
1308error will be fixed in the next release of Berkeley DB.
1309
1310That clarifies the situation with regards Berkeley DB itself. What
1311about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1312quite useful, so B<DB_File> conforms to it.
36477c24 1313
1314That means that you can specify other options (e.g. cachesize) and
1315still have bval default to C<"\n"> for variable length records, and
1316space for fixed length records.
1317
c5da4faf
PM
1318Also note that the bval option only allows you to specify a single byte
1319as a delimeter.
1320
f6b705ef 1321=head2 A Simple Example
3b35bae3 1322
6ca2e664
PM
1323Here is a simple example that uses RECNO (if you are using a version
1324of Perl earlier than 5.004_57 this example won't work -- see
1325L<Extra RECNO Methods> for a workaround).
f6b705ef 1326
3245f058 1327 use warnings ;
610ab055 1328 use strict ;
f6b705ef 1329 use DB_File ;
f6b705ef 1330
2c2d71f5
JH
1331 my $filename = "text" ;
1332 unlink $filename ;
1333
610ab055 1334 my @h ;
2c2d71f5 1335 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_RECNO
f6b705ef 1336 or die "Cannot open file 'text': $!\n" ;
1337
1338 # Add a few key/value pairs to the file
1339 $h[0] = "orange" ;
1340 $h[1] = "blue" ;
1341 $h[2] = "yellow" ;
1342
6ca2e664
PM
1343 push @h, "green", "black" ;
1344
1345 my $elements = scalar @h ;
1346 print "The array contains $elements entries\n" ;
1347
1348 my $last = pop @h ;
1349 print "popped $last\n" ;
1350
1351 unshift @h, "white" ;
1352 my $first = shift @h ;
1353 print "shifted $first\n" ;
1354
f6b705ef 1355 # Check for existence of a key
1356 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1357
1358 # use a negative index
1359 print "The last element is $h[-1]\n" ;
1360 print "The 2nd last element is $h[-2]\n" ;
1361
1362 untie @h ;
3b35bae3 1363
f6b705ef 1364Here is the output from the script:
1365
6ca2e664
PM
1366 The array contains 5 entries
1367 popped black
2c2d71f5 1368 shifted white
f6b705ef 1369 Element 1 Exists with value blue
6ca2e664
PM
1370 The last element is green
1371 The 2nd last element is yellow
f6b705ef 1372
6ca2e664 1373=head2 Extra RECNO Methods
f6b705ef 1374
045291aa 1375If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664
PM
1376array interface is quite limited. In the example script above
1377C<push>, C<pop>, C<shift>, C<unshift>
1378or determining the array length will not work with a tied array.
045291aa
PM
1379
1380To make the interface more useful for older versions of Perl, a number
1381of methods are supplied with B<DB_File> to simulate the missing array
1382operations. All these methods are accessed via the object returned from
1383the tie call.
f6b705ef 1384
1385Here are the methods:
1386
1387=over 5
3b35bae3 1388
f6b705ef 1389=item B<$X-E<gt>push(list) ;>
1390
1391Pushes the elements of C<list> to the end of the array.
1392
1393=item B<$value = $X-E<gt>pop ;>
1394
1395Removes and returns the last element of the array.
1396
1397=item B<$X-E<gt>shift>
1398
1399Removes and returns the first element of the array.
1400
1401=item B<$X-E<gt>unshift(list) ;>
1402
1403Pushes the elements of C<list> to the start of the array.
1404
1405=item B<$X-E<gt>length>
1406
1407Returns the number of elements in the array.
1408
c5da4faf
PM
1409=item B<$X-E<gt>splice(offset, length, elements);>
1410
1411Returns a splice of the the array.
1412
f6b705ef 1413=back
1414
1415=head2 Another Example
1416
1417Here is a more complete example that makes use of some of the methods
1418described above. It also makes use of the API interface directly (see
1419L<THE API INTERFACE>).
1420
3245f058 1421 use warnings ;
f6b705ef 1422 use strict ;
1423 use vars qw(@h $H $file $i) ;
1424 use DB_File ;
1425 use Fcntl ;
bbc7dcd2 1426
f6b705ef 1427 $file = "text" ;
1428
1429 unlink $file ;
1430
1431 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO
1432 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1433
f6b705ef 1434 # first create a text file to play with
1435 $h[0] = "zero" ;
1436 $h[1] = "one" ;
1437 $h[2] = "two" ;
1438 $h[3] = "three" ;
1439 $h[4] = "four" ;
1440
bbc7dcd2 1441
f6b705ef 1442 # Print the records in order.
1443 #
1444 # The length method is needed here because evaluating a tied
1445 # array in a scalar context does not return the number of
1446 # elements in the array.
1447
1448 print "\nORIGINAL\n" ;
1449 foreach $i (0 .. $H->length - 1) {
1450 print "$i: $h[$i]\n" ;
1451 }
1452
1453 # use the push & pop methods
1454 $a = $H->pop ;
1455 $H->push("last") ;
1456 print "\nThe last record was [$a]\n" ;
1457
1458 # and the shift & unshift methods
1459 $a = $H->shift ;
1460 $H->unshift("first") ;
1461 print "The first record was [$a]\n" ;
1462
1463 # Use the API to add a new record after record 2.
1464 $i = 2 ;
1465 $H->put($i, "Newbie", R_IAFTER) ;
1466
1467 # and a new record before record 1.
1468 $i = 1 ;
1469 $H->put($i, "New One", R_IBEFORE) ;
1470
1471 # delete record 3
1472 $H->del(3) ;
1473
1474 # now print the records in reverse order
1475 print "\nREVERSE\n" ;
1476 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1477 { print "$i: $h[$i]\n" }
1478
1479 # same again, but use the API functions instead
1480 print "\nREVERSE again\n" ;
610ab055 1481 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1482 for ($s = $H->seq($k, $v, R_LAST) ;
1483 $s == 0 ;
1484 $s = $H->seq($k, $v, R_PREV))
1485 { print "$k: $v\n" }
1486
1487 undef $H ;
1488 untie @h ;
1489
1490and this is what it outputs:
1491
1492 ORIGINAL
1493 0: zero
1494 1: one
1495 2: two
1496 3: three
1497 4: four
1498
1499 The last record was [four]
1500 The first record was [zero]
1501
1502 REVERSE
1503 5: last
1504 4: three
1505 3: Newbie
1506 2: one
1507 1: New One
1508 0: first
1509
1510 REVERSE again
1511 5: last
1512 4: three
1513 3: Newbie
1514 2: one
1515 1: New One
1516 0: first
1517
1518Notes:
1519
1520=over 5
1521
1522=item 1.
1523
1524Rather than iterating through the array, C<@h> like this:
1525
1526 foreach $i (@h)
1527
1528it is necessary to use either this:
1529
1530 foreach $i (0 .. $H->length - 1)
1531
1532or this:
1533
1534 for ($a = $H->get($k, $v, R_FIRST) ;
1535 $a == 0 ;
1536 $a = $H->get($k, $v, R_NEXT) )
1537
1538=item 2.
1539
1540Notice that both times the C<put> method was used the record index was
1541specified using a variable, C<$i>, rather than the literal value
1542itself. This is because C<put> will return the record number of the
1543inserted line via that parameter.
1544
1545=back
1546
1547=head1 THE API INTERFACE
3b35bae3
AD
1548
1549As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1550possible to make direct use of most of the API functions defined in the
8e07c86e 1551Berkeley DB documentation.
3b35bae3 1552
88108326 1553To do this you need to store a copy of the object returned from the tie.
3b35bae3 1554
88108326 1555 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1556
8e07c86e 1557Once you have done that, you can access the Berkeley DB API functions
88108326 1558as B<DB_File> methods directly like this:
3b35bae3
AD
1559
1560 $db->put($key, $value, R_NOOVERWRITE) ;
1561
88108326 1562B<Important:> If you have saved a copy of the object returned from
1563C<tie>, the underlying database file will I<not> be closed until both
1564the tied variable is untied and all copies of the saved object are
610ab055 1565destroyed.
88108326 1566
1567 use DB_File ;
1568 $db = tie %hash, "DB_File", "filename"
1569 or die "Cannot tie filename: $!" ;
1570 ...
1571 undef $db ;
1572 untie %hash ;
1573
9a2c4ce3 1574See L<The untie() Gotcha> for more details.
778183f3 1575
88108326 1576All the functions defined in L<dbopen> are available except for
1577close() and dbopen() itself. The B<DB_File> method interface to the
1578supported functions have been implemented to mirror the way Berkeley DB
1579works whenever possible. In particular note that:
1580
1581=over 5
1582
1583=item *
1584
1585The methods return a status value. All return 0 on success.
1586All return -1 to signify an error and set C<$!> to the exact
1587error code. The return code 1 generally (but not always) means that the
1588key specified did not exist in the database.
1589
1590Other return codes are defined. See below and in the Berkeley DB
1591documentation for details. The Berkeley DB documentation should be used
1592as the definitive source.
1593
1594=item *
3b35bae3 1595
88108326 1596Whenever a Berkeley DB function returns data via one of its parameters,
1597the equivalent B<DB_File> method does exactly the same.
3b35bae3 1598
88108326 1599=item *
1600
1601If you are careful, it is possible to mix API calls with the tied
1602hash/array interface in the same piece of code. Although only a few of
1603the methods used to implement the tied interface currently make use of
1604the cursor, you should always assume that the cursor has been changed
1605any time the tied hash/array interface is used. As an example, this
1606code will probably not do what you expect:
1607
1608 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1609 or die "Cannot tie $filename: $!" ;
1610
1611 # Get the first key/value pair and set the cursor
1612 $X->seq($key, $value, R_FIRST) ;
1613
1614 # this line will modify the cursor
1615 $count = scalar keys %x ;
1616
1617 # Get the second key/value pair.
1618 # oops, it didn't, it got the last key/value pair!
1619 $X->seq($key, $value, R_NEXT) ;
1620
1621The code above can be rearranged to get around the problem, like this:
1622
1623 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1624 or die "Cannot tie $filename: $!" ;
1625
1626 # this line will modify the cursor
1627 $count = scalar keys %x ;
1628
1629 # Get the first key/value pair and set the cursor
1630 $X->seq($key, $value, R_FIRST) ;
1631
1632 # Get the second key/value pair.
1633 # worked this time.
1634 $X->seq($key, $value, R_NEXT) ;
1635
1636=back
1637
1638All the constants defined in L<dbopen> for use in the flags parameters
1639in the methods defined below are also available. Refer to the Berkeley
1640DB documentation for the precise meaning of the flags values.
1641
1642Below is a list of the methods available.
3b35bae3
AD
1643
1644=over 5
1645
f6b705ef 1646=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1647
1648Given a key (C<$key>) this method reads the value associated with it
1649from the database. The value read from the database is returned in the
1650C<$value> parameter.
3b35bae3 1651
88108326 1652If the key does not exist the method returns 1.
3b35bae3 1653
88108326 1654No flags are currently defined for this method.
3b35bae3 1655
f6b705ef 1656=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1657
88108326 1658Stores the key/value pair in the database.
1659
1660If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1661will have the record number of the inserted key/value pair set.
3b35bae3 1662
88108326 1663Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1664R_SETCURSOR.
1665
f6b705ef 1666=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1667
88108326 1668Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1669
88108326 1670A return code of 1 means that the requested key was not in the
1671database.
3b35bae3 1672
88108326 1673R_CURSOR is the only valid flag at present.
3b35bae3 1674
f6b705ef 1675=item B<$status = $X-E<gt>fd ;>
3b35bae3 1676
88108326 1677Returns the file descriptor for the underlying database.
3b35bae3 1678
b90e71be
GS
1679See L<Locking: The Trouble with fd> for an explanation for why you should
1680not use C<fd> to lock your database.
3b35bae3 1681
f6b705ef 1682=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1683
88108326 1684This interface allows sequential retrieval from the database. See
1685L<dbopen> for full details.
1686
1687Both the C<$key> and C<$value> parameters will be set to the key/value
1688pair read from the database.
1689
1690The flags parameter is mandatory. The valid flag values are R_CURSOR,
1691R_FIRST, R_LAST, R_NEXT and R_PREV.
1692
f6b705ef 1693=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1694
1695Flushes any cached buffers to disk.
1696
1697R_RECNOSYNC is the only valid flag at present.
3b35bae3
AD
1698
1699=back
1700
cad2e5aa
JH
1701=head1 DBM FILTERS
1702
1703A DBM Filter is a piece of code that is be used when you I<always>
1704want to make the same transformation to all keys and/or values in a
1705DBM database.
1706
1707There are four methods associated with DBM Filters. All work identically,
1708and each is used to install (or uninstall) a single DBM Filter. Each
1709expects a single parameter, namely a reference to a sub. The only
1710difference between them is the place that the filter is installed.
1711
1712To summarise:
1713
1714=over 5
1715
1716=item B<filter_store_key>
1717
1718If a filter has been installed with this method, it will be invoked
1719every time you write a key to a DBM database.
1720
1721=item B<filter_store_value>
1722
1723If a filter has been installed with this method, it will be invoked
1724every time you write a value to a DBM database.
1725
1726
1727=item B<filter_fetch_key>
1728
1729If a filter has been installed with this method, it will be invoked
1730every time you read a key from a DBM database.
1731
1732=item B<filter_fetch_value>
1733
1734If a filter has been installed with this method, it will be invoked
1735every time you read a value from a DBM database.
1736
1737=back
1738
1739You can use any combination of the methods, from none, to all four.
1740
1741All filter methods return the existing filter, if present, or C<undef>
1742in not.
1743
1744To delete a filter pass C<undef> to it.
1745
1746=head2 The Filter
1747
1748When each filter is called by Perl, a local copy of C<$_> will contain
1749the key or value to be filtered. Filtering is achieved by modifying
1750the contents of C<$_>. The return code from the filter is ignored.
1751
1752=head2 An Example -- the NULL termination problem.
1753
1754Consider the following scenario. You have a DBM database
1755that you need to share with a third-party C application. The C application
1756assumes that I<all> keys and values are NULL terminated. Unfortunately
1757when Perl writes to DBM databases it doesn't use NULL termination, so
1758your Perl application will have to manage NULL termination itself. When
1759you write to the database you will have to use something like this:
1760
1761 $hash{"$key\0"} = "$value\0" ;
1762
1763Similarly the NULL needs to be taken into account when you are considering
1764the length of existing keys/values.
1765
1766It would be much better if you could ignore the NULL terminations issue
1767in the main application code and have a mechanism that automatically
1768added the terminating NULL to all keys and values whenever you write to
1769the database and have them removed when you read from the database. As I'm
1770sure you have already guessed, this is a problem that DBM Filters can
1771fix very easily.
1772
3245f058 1773 use warnings ;
cad2e5aa
JH
1774 use strict ;
1775 use DB_File ;
1776
1777 my %hash ;
1778 my $filename = "/tmp/filt" ;
1779 unlink $filename ;
1780
1781 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1782 or die "Cannot open $filename: $!\n" ;
1783
1784 # Install DBM Filters
1785 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1786 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1787 $db->filter_fetch_value( sub { s/\0$// } ) ;
1788 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1789
1790 $hash{"abc"} = "def" ;
1791 my $a = $hash{"ABC"} ;
1792 # ...
1793 undef $db ;
1794 untie %hash ;
1795
1796Hopefully the contents of each of the filters should be
1797self-explanatory. Both "fetch" filters remove the terminating NULL,
1798and both "store" filters add a terminating NULL.
1799
1800
1801=head2 Another Example -- Key is a C int.
1802
1803Here is another real-life example. By default, whenever Perl writes to
1804a DBM database it always writes the key and value as strings. So when
1805you use this:
1806
1807 $hash{12345} = "soemthing" ;
1808
1809the key 12345 will get stored in the DBM database as the 5 byte string
1810"12345". If you actually want the key to be stored in the DBM database
1811as a C int, you will have to use C<pack> when writing, and C<unpack>
1812when reading.
1813
1814Here is a DBM Filter that does it:
1815
3245f058 1816 use warnings ;
cad2e5aa
JH
1817 use strict ;
1818 use DB_File ;
1819 my %hash ;
1820 my $filename = "/tmp/filt" ;
1821 unlink $filename ;
1822
1823
1824 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1825 or die "Cannot open $filename: $!\n" ;
1826
1827 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1828 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1829 $hash{123} = "def" ;
1830 # ...
1831 undef $db ;
1832 untie %hash ;
1833
1834This time only two filters have been used -- we only need to manipulate
1835the contents of the key, so it wasn't necessary to install any value
1836filters.
1837
f6b705ef 1838=head1 HINTS AND TIPS
3b35bae3 1839
3b35bae3 1840
b90e71be 1841=head2 Locking: The Trouble with fd
3b35bae3 1842
b90e71be
GS
1843Until version 1.72 of this module, the recommended technique for locking
1844B<DB_File> databases was to flock the filehandle returned from the "fd"
1845function. Unfortunately this technique has been shown to be fundamentally
1846flawed (Kudos to David Harris for tracking this down). Use it at your own
1847peril!
3b35bae3 1848
b90e71be 1849The locking technique went like this.
cb1a09d0 1850
b90e71be
GS
1851 $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644)
1852 || die "dbcreat /tmp/foo.db $!";
1853 $fd = $db->fd;
1854 open(DB_FH, "+<&=$fd") || die "dup $!";
1855 flock (DB_FH, LOCK_EX) || die "flock: $!";
1856 ...
1857 $db{"Tom"} = "Jerry" ;
1858 ...
1859 flock(DB_FH, LOCK_UN);
1860 undef $db;
1861 untie %db;
1862 close(DB_FH);
cb1a09d0 1863
b90e71be 1864In simple terms, this is what happens:
cb1a09d0 1865
b90e71be 1866=over 5
cb1a09d0 1867
b90e71be 1868=item 1.
cb1a09d0 1869
b90e71be 1870Use "tie" to open the database.
cb1a09d0 1871
b90e71be 1872=item 2.
cb1a09d0 1873
b90e71be 1874Lock the database with fd & flock.
cb1a09d0 1875
b90e71be 1876=item 3.
cb1a09d0 1877
b90e71be 1878Read & Write to the database.
cb1a09d0 1879
b90e71be 1880=item 4.
cb1a09d0 1881
b90e71be 1882Unlock and close the database.
cb1a09d0 1883
b90e71be
GS
1884=back
1885
1886Here is the crux of the problem. A side-effect of opening the B<DB_File>
1887database in step 2 is that an initial block from the database will get
1888read from disk and cached in memory.
1889
1890To see why this is a problem, consider what can happen when two processes,
1891say "A" and "B", both want to update the same B<DB_File> database
1892using the locking steps outlined above. Assume process "A" has already
1893opened the database and has a write lock, but it hasn't actually updated
1894the database yet (it has finished step 2, but not started step 3 yet). Now
1895process "B" tries to open the same database - step 1 will succeed,
1896but it will block on step 2 until process "A" releases the lock. The
1897important thing to notice here is that at this point in time both
1898processes will have cached identical initial blocks from the database.
1899
1900Now process "A" updates the database and happens to change some of the
1901data held in the initial buffer. Process "A" terminates, flushing
1902all cached data to disk and releasing the database lock. At this point
1903the database on disk will correctly reflect the changes made by process
1904"A".
1905
1906With the lock released, process "B" can now continue. It also updates the
1907database and unfortunately it too modifies the data that was in its
1908initial buffer. Once that data gets flushed to disk it will overwrite
1909some/all of the changes process "A" made to the database.
1910
1911The result of this scenario is at best a database that doesn't contain
1912what you expect. At worst the database will corrupt.
1913
1914The above won't happen every time competing process update the same
1915B<DB_File> database, but it does illustrate why the technique should
1916not be used.
1917
1918=head2 Safe ways to lock a database
1919
1920Starting with version 2.x, Berkeley DB has internal support for locking.
1921The companion module to this one, B<BerkeleyDB>, provides an interface
1922to this locking functionality. If you are serious about locking
1923Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1924
1925If using B<BerkeleyDB> isn't an option, there are a number of modules
1926available on CPAN that can be used to implement locking. Each one
1927implements locking differently and has different goals in mind. It is
1928therefore worth knowing the difference, so that you can pick the right
1929one for your application. Here are the three locking wrappers:
1930
1931=over 5
1932
1933=item B<Tie::DB_Lock>
1934
1935A B<DB_File> wrapper which creates copies of the database file for
1936read access, so that you have a kind of a multiversioning concurrent read
1937system. However, updates are still serial. Use for databases where reads
1938may be lengthy and consistency problems may occur.
1939
1940=item B<Tie::DB_LockFile>
1941
1942A B<DB_File> wrapper that has the ability to lock and unlock the database
1943while it is being used. Avoids the tie-before-flock problem by simply
1944re-tie-ing the database when you get or drop a lock. Because of the
1945flexibility in dropping and re-acquiring the lock in the middle of a
1946session, this can be massaged into a system that will work with long
1947updates and/or reads if the application follows the hints in the POD
1948documentation.
1949
1950=item B<DB_File::Lock>
1951
1952An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
1953before tie-ing the database and drops the lock after the untie. Allows
1954one to use the same lockfile for multiple databases to avoid deadlock
1955problems, if desired. Use for databases where updates are reads are
1956quick and simple flock locking semantics are enough.
1957
1958=back
cb1a09d0 1959
68dc0745 1960=head2 Sharing Databases With C Applications
f6b705ef 1961
1962There is no technical reason why a Berkeley DB database cannot be
1963shared by both a Perl and a C application.
1964
1965The vast majority of problems that are reported in this area boil down
1966to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 1967not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 1968
1969Here is a real example. Netscape 2.0 keeps a record of the locations you
1970visit along with the time you last visited them in a DB_HASH database.
1971This is usually stored in the file F<~/.netscape/history.db>. The key
1972field in the database is the location string and the value field is the
1973time the location was last visited stored as a 4 byte binary value.
1974
1975If you haven't already guessed, the location string is stored with a
1976terminating NULL. This means you need to be careful when accessing the
1977database.
1978
1979Here is a snippet of code that is loosely based on Tom Christiansen's
1980I<ggh> script (available from your nearest CPAN archive in
1981F<authors/id/TOMC/scripts/nshist.gz>).
1982
3245f058 1983 use warnings ;
610ab055 1984 use strict ;
f6b705ef 1985 use DB_File ;
1986 use Fcntl ;
f6b705ef 1987
610ab055 1988 use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ;
f6b705ef 1989 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
1990
1991 $HISTORY = "$dotdir/.netscape/history.db";
1992
1993 tie %hist_db, 'DB_File', $HISTORY
1994 or die "Cannot open $HISTORY: $!\n" ;;
1995
1996 # Dump the complete database
1997 while ( ($href, $binary_time) = each %hist_db ) {
1998
1999 # remove the terminating NULL
2000 $href =~ s/\x00$// ;
2001
2002 # convert the binary time into a user friendly string
2003 $date = localtime unpack("V", $binary_time);
2004 print "$date $href\n" ;
2005 }
2006
2007 # check for the existence of a specific key
2008 # remember to add the NULL
2009 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2010 $date = localtime unpack("V", $binary_time) ;
2011 print "Last visited mox.perl.com on $date\n" ;
2012 }
2013 else {
2014 print "Never visited mox.perl.com\n"
2015 }
2016
2017 untie %hist_db ;
2018
68dc0745 2019=head2 The untie() Gotcha
778183f3 2020
7a2e2cd6 2021If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2022recommended that you read L<perltie/The untie Gotcha>.
778183f3
PM
2023
2024Even if you don't currently make use of the API interface, it is still
2025worth reading it.
2026
2027Here is an example which illustrates the problem from a B<DB_File>
2028perspective:
2029
2030 use DB_File ;
2031 use Fcntl ;
2032
2033 my %x ;
2034 my $X ;
2035
2036 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2037 or die "Cannot tie first time: $!" ;
2038
2039 $x{123} = 456 ;
2040
2041 untie %x ;
2042
2043 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2044 or die "Cannot tie second time: $!" ;
2045
2046 untie %x ;
2047
2048When run, the script will produce this error message:
2049
2050 Cannot tie second time: Invalid argument at bad.file line 14.
2051
2052Although the error message above refers to the second tie() statement
2053in the script, the source of the problem is really with the untie()
2054statement that precedes it.
2055
2056Having read L<perltie> you will probably have already guessed that the
2057error is caused by the extra copy of the tied object stored in C<$X>.
2058If you haven't, then the problem boils down to the fact that the
2059B<DB_File> destructor, DESTROY, will not be called until I<all>
2060references to the tied object are destroyed. Both the tied variable,
2061C<%x>, and C<$X> above hold a reference to the object. The call to
2062untie() will destroy the first, but C<$X> still holds a valid
2063reference, so the destructor will not get called and the database file
2064F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2065attempt to open a database that is already open via the catch-all
778183f3
PM
2066"Invalid argument" doesn't help.
2067
2068If you run the script with the C<-w> flag the error message becomes:
2069
2070 untie attempted while 1 inner references still exist at bad.file line 12.
2071 Cannot tie second time: Invalid argument at bad.file line 14.
2072
2073which pinpoints the real problem. Finally the script can now be
2074modified to fix the original problem by destroying the API object
2075before the untie:
2076
2077 ...
2078 $x{123} = 456 ;
2079
2080 undef $X ;
2081 untie %x ;
2082
2083 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2084 ...
2085
f6b705ef 2086
2087=head1 COMMON QUESTIONS
2088
2089=head2 Why is there Perl source in my database?
2090
2091If you look at the contents of a database file created by DB_File,
2092there can sometimes be part of a Perl script included in it.
2093
2094This happens because Berkeley DB uses dynamic memory to allocate
2095buffers which will subsequently be written to the database file. Being
2096dynamic, the memory could have been used for anything before DB
2097malloced it. As Berkeley DB doesn't clear the memory once it has been
2098allocated, the unused portions will contain random junk. In the case
2099where a Perl script gets written to the database, the random junk will
2100correspond to an area of dynamic memory that happened to be used during
2101the compilation of the script.
2102
2103Unless you don't like the possibility of there being part of your Perl
2104scripts embedded in a database file, this is nothing to worry about.
2105
2106=head2 How do I store complex data structures with DB_File?
2107
2108Although B<DB_File> cannot do this directly, there is a module which
2109can layer transparently over B<DB_File> to accomplish this feat.
2110
2111Check out the MLDBM module, available on CPAN in the directory
2112F<modules/by-module/MLDBM>.
2113
2114=head2 What does "Invalid Argument" mean?
2115
2116You will get this error message when one of the parameters in the
2117C<tie> call is wrong. Unfortunately there are quite a few parameters to
2118get wrong, so it can be difficult to figure out which one it is.
2119
2120Here are a couple of possibilities:
2121
2122=over 5
2123
2124=item 1.
2125
610ab055 2126Attempting to reopen a database without closing it.
f6b705ef 2127
2128=item 2.
2129
2130Using the O_WRONLY flag.
2131
2132=back
2133
2134=head2 What does "Bareword 'DB_File' not allowed" mean?
2135
2136You will encounter this particular error message when you have the
2137C<strict 'subs'> pragma (or the full strict pragma) in your script.
2138Consider this script:
2139
3245f058 2140 use warnings ;
f6b705ef 2141 use strict ;
2142 use DB_File ;
2143 use vars qw(%x) ;
2144 tie %x, DB_File, "filename" ;
2145
2146Running it produces the error in question:
2147
2148 Bareword "DB_File" not allowed while "strict subs" in use
2149
2150To get around the error, place the word C<DB_File> in either single or
2151double quotes, like this:
2152
2153 tie %x, "DB_File", "filename" ;
2154
2155Although it might seem like a real pain, it is really worth the effort
2156of having a C<use strict> in all your scripts.
2157
cad2e5aa
JH
2158=head1 REFERENCES
2159
2160Articles that are either about B<DB_File> or make use of it.
2161
2162=over 5
2163
2164=item 1.
2165
2166I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2167Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2168
2169=back
2170
cb1a09d0
AD
2171=head1 HISTORY
2172
1f70e1ea 2173Moved to the Changes file.
610ab055 2174
1f70e1ea 2175=head1 BUGS
05475680 2176
1f70e1ea
PM
2177Some older versions of Berkeley DB had problems with fixed length
2178records using the RECNO file format. This problem has been fixed since
2179version 1.85 of Berkeley DB.
e858de61 2180
1f70e1ea
PM
2181I am sure there are bugs in the code. If you do find any, or can
2182suggest any enhancements, I would welcome your comments.
a6ed719b 2183
1f70e1ea 2184=head1 AVAILABILITY
a6ed719b 2185
1f70e1ea
PM
2186B<DB_File> comes with the standard Perl source distribution. Look in
2187the directory F<ext/DB_File>. Given the amount of time between releases
2188of Perl the version that ships with Perl is quite likely to be out of
2189date, so the most recent version can always be found on CPAN (see
2190L<perlmod/CPAN> for details), in the directory
2191F<modules/by-module/DB_File>.
a6ed719b 2192
039d031f
PM
2193This version of B<DB_File> will work with either version 1.x, 2.x or
21943.x of Berkeley DB, but is limited to the functionality provided by
2195version 1.
a6ed719b 2196
cad2e5aa 2197The official web site for Berkeley DB is F<http://www.sleepycat.com>.
039d031f 2198All versions of Berkeley DB are available there.
93af7a87 2199
1f70e1ea
PM
2200Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2201archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2202
1f70e1ea
PM
2203If you are running IRIX, then get Berkeley DB version 1 from
2204F<http://reality.sgi.com/ariel>. It has the patches necessary to
2205compile properly on IRIX 5.3.
610ab055 2206
1f70e1ea 2207=head1 COPYRIGHT
3b35bae3 2208
c5da4faf 2209Copyright (c) 1995-2001 Paul Marquess. All rights reserved. This program
a9fd575d
PM
2210is free software; you can redistribute it and/or modify it under the
2211same terms as Perl itself.
3b35bae3 2212
1f70e1ea
PM
2213Although B<DB_File> is covered by the Perl license, the library it
2214makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2215copyright and its own license. Please take the time to read it.
3b35bae3 2216
a9fd575d 2217Here are are few words taken from the Berkeley DB FAQ (at
b90e71be 2218F<http://www.sleepycat.com>) regarding the license:
68dc0745 2219
a9fd575d 2220 Do I have to license DB to use it in Perl scripts?
3b35bae3 2221
a9fd575d
PM
2222 No. The Berkeley DB license requires that software that uses
2223 Berkeley DB be freely redistributable. In the case of Perl, that
2224 software is Perl, and not your scripts. Any Perl scripts that you
2225 write are your property, including scripts that make use of
2226 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2227 place any restriction on what you may do with them.
88108326 2228
1f70e1ea
PM
2229If you are in any doubt about the license situation, contact either the
2230Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1
PM
2231
2232
3b35bae3
AD
2233=head1 SEE ALSO
2234
9fe6733a
PM
2235L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2236L<dbmfilter>
3b35bae3 2237
3b35bae3
AD
2238=head1 AUTHOR
2239
8e07c86e 2240The DB_File interface was written by Paul Marquess
6ca2e664 2241E<lt>Paul.Marquess@btinternet.comE<gt>.
d3ef3b8a
PM
2242Questions about the DB system itself may be addressed to
2243E<lt>db@sleepycat.com<gt>.
3b35bae3
AD
2244
2245=cut