This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Update DB_File from CPAN version 1.838 to 1.840.
[perl5.git] / cpan / DB_File / DB_File.pm
CommitLineData
a0d0e21e
LW
1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
b6990ae0 3# Written by Paul Marquess (pmqs@cpan.org)
36477c24 4#
c7cd1ed9 5# Copyright (c) 1995-2016 Paul Marquess. All rights reserved.
36477c24
PP
6# This program is free software; you can redistribute it and/or
7# modify it under the same terms as Perl itself.
8
8e07c86e
AD
9
10package DB_File::HASHINFO ;
785da04d 11
a56f0d08 12require 5.008003;
610ab055 13
3245f058 14use warnings;
785da04d 15use strict;
8e07c86e 16use Carp;
88108326
PP
17require Tie::Hash;
18@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 19
88108326 20sub new
8e07c86e 21{
88108326
PP
22 my $pkg = shift ;
23 my %x ;
24 tie %x, $pkg ;
25 bless \%x, $pkg ;
8e07c86e
AD
26}
27
610ab055 28
88108326
PP
29sub TIEHASH
30{
31 my $pkg = shift ;
32
efc79c7d
PM
33 bless { VALID => {
34 bsize => 1,
35 ffactor => 1,
36 nelem => 1,
37 cachesize => 1,
38 hash => 2,
39 lorder => 1,
36477c24
PP
40 },
41 GOT => {}
42 }, $pkg ;
88108326 43}
8e07c86e 44
610ab055 45
8e07c86e
AD
46sub FETCH
47{
88108326
PP
48 my $self = shift ;
49 my $key = shift ;
8e07c86e 50
36477c24 51 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326
PP
52
53 my $pkg = ref $self ;
54 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e
AD
55}
56
57
58sub STORE
59{
88108326
PP
60 my $self = shift ;
61 my $key = shift ;
62 my $value = shift ;
63
efc79c7d
PM
64 my $type = $self->{VALID}{$key};
65
66 if ( $type )
8e07c86e 67 {
efc79c7d
PM
68 croak "Key '$key' not associated with a code reference"
69 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 70 $self->{GOT}{$key} = $value ;
8e07c86e
AD
71 return ;
72 }
73
88108326
PP
74 my $pkg = ref $self ;
75 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e
AD
76}
77
78sub DELETE
79{
88108326
PP
80 my $self = shift ;
81 my $key = shift ;
82
36477c24 83 if ( exists $self->{VALID}{$key} )
8e07c86e 84 {
36477c24 85 delete $self->{GOT}{$key} ;
8e07c86e
AD
86 return ;
87 }
88
88108326
PP
89 my $pkg = ref $self ;
90 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e
AD
91}
92
88108326 93sub EXISTS
8e07c86e 94{
88108326
PP
95 my $self = shift ;
96 my $key = shift ;
8e07c86e 97
36477c24 98 exists $self->{VALID}{$key} ;
8e07c86e
AD
99}
100
88108326 101sub NotHere
8e07c86e 102{
18d2dc8c 103 my $self = shift ;
88108326 104 my $method = shift ;
8e07c86e 105
18d2dc8c 106 croak ref($self) . " does not define the method ${method}" ;
8e07c86e
AD
107}
108
18d2dc8c
PM
109sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
110sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
111sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e
AD
112
113package DB_File::RECNOINFO ;
785da04d 114
3245f058 115use warnings;
88108326
PP
116use strict ;
117
045291aa 118@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e
AD
119
120sub TIEHASH
121{
88108326
PP
122 my $pkg = shift ;
123
36477c24
PP
124 bless { VALID => { map {$_, 1}
125 qw( bval cachesize psize flags lorder reclen bfname )
126 },
127 GOT => {},
128 }, $pkg ;
8e07c86e
AD
129}
130
88108326 131package DB_File::BTREEINFO ;
8e07c86e 132
3245f058 133use warnings;
88108326 134use strict ;
8e07c86e 135
88108326 136@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 137
88108326 138sub TIEHASH
8e07c86e 139{
88108326
PP
140 my $pkg = shift ;
141
efc79c7d
PM
142 bless { VALID => {
143 flags => 1,
144 cachesize => 1,
145 maxkeypage => 1,
146 minkeypage => 1,
147 psize => 1,
148 compare => 2,
149 prefix => 2,
150 lorder => 1,
36477c24
PP
151 },
152 GOT => {},
153 }, $pkg ;
8e07c86e
AD
154}
155
156
8e07c86e 157package DB_File ;
785da04d 158
3245f058 159use warnings;
785da04d 160use strict;
07200f1b 161our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
ebf49c8b 162our ($db_version, $use_XSLoader, $splice_end_array_no_length, $splice_end_array, $Error);
8e07c86e
AD
163use Carp;
164
785da04d 165
74f485aa 166$VERSION = "1.840" ;
083e9212 167$VERSION = eval $VERSION; # needed for dev releases
d85a743d
PM
168
169{
82c92bb0 170 local $SIG{__WARN__} = sub {$splice_end_array_no_length = join(" ",@_);};
d85a743d 171 my @a =(1); splice(@a, 3);
ebf49c8b
CBW
172 $splice_end_array_no_length =
173 ($splice_end_array_no_length =~ /^splice\(\) offset past end of array at /);
174}
175{
82c92bb0 176 local $SIG{__WARN__} = sub {$splice_end_array = join(" ", @_);};
ebf49c8b 177 my @a =(1); splice(@a, 3, 1);
d85a743d
PM
178 $splice_end_array =
179 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
180}
8e07c86e
AD
181
182#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326
PP
183$DB_BTREE = new DB_File::BTREEINFO ;
184$DB_HASH = new DB_File::HASHINFO ;
185$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 186
785da04d 187require Tie::Hash;
8e07c86e 188require Exporter;
b90e71be
GS
189BEGIN {
190 $use_XSLoader = 1 ;
e5021521 191 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be
GS
192
193 if ($@) {
194 $use_XSLoader = 0 ;
195 require DynaLoader;
196 @ISA = qw(DynaLoader);
197 }
198}
199
200push @ISA, qw(Tie::Hash Exporter);
8e07c86e
AD
201@EXPORT = qw(
202 $DB_BTREE $DB_HASH $DB_RECNO
88108326 203
8e07c86e
AD
204 BTREEMAGIC
205 BTREEVERSION
206 DB_LOCK
207 DB_SHMEM
208 DB_TXN
209 HASHMAGIC
210 HASHVERSION
211 MAX_PAGE_NUMBER
212 MAX_PAGE_OFFSET
213 MAX_REC_NUMBER
214 RET_ERROR
215 RET_SPECIAL
216 RET_SUCCESS
217 R_CURSOR
218 R_DUP
219 R_FIRST
220 R_FIXEDLEN
221 R_IAFTER
222 R_IBEFORE
223 R_LAST
224 R_NEXT
225 R_NOKEY
226 R_NOOVERWRITE
227 R_PREV
228 R_RECNOSYNC
229 R_SETCURSOR
230 R_SNAPSHOT
231 __R_UNUSED
88108326 232
045291aa 233);
8e07c86e
AD
234
235sub AUTOLOAD {
785da04d 236 my($constname);
8e07c86e 237 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b
PM
238 my ($error, $val) = constant($constname);
239 Carp::croak $error if $error;
57c77851
JS
240 no strict 'refs';
241 *{$AUTOLOAD} = sub { $val };
242 goto &{$AUTOLOAD};
07200f1b 243}
8e07c86e 244
f6b705ef 245
a6ed719b 246eval {
1f70e1ea
PM
247 # Make all Fcntl O_XXX constants available for importing
248 require Fcntl;
249 my @O = grep /^O_/, @Fcntl::EXPORT;
250 Fcntl->import(@O); # first we import what we want to export
251 push(@EXPORT, @O);
a6ed719b 252};
f6b705ef 253
b90e71be
GS
254if ($use_XSLoader)
255 { XSLoader::load("DB_File", $VERSION)}
256else
257 { bootstrap DB_File $VERSION }
8e07c86e 258
05475680 259sub tie_hash_or_array
610ab055
PM
260{
261 my (@arg) = @_ ;
05475680 262 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 263
f1aa208b
RGS
264 use File::Spec;
265 $arg[1] = File::Spec->rel2abs($arg[1])
266 if defined $arg[1] ;
267
610ab055
PM
268 $arg[4] = tied %{ $arg[4] }
269 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
270
efc79c7d
PM
271 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
272 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
273
9c095db2
PM
274 # make recno in Berkeley DB version 2 (or better) work like
275 # recno in version 1.
d6067fe3
SP
276 if ($db_version >= 4 and ! $tieHASH) {
277 $arg[2] |= O_CREAT();
278 }
279
1f70e1ea
PM
280 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
281 $arg[1] and ! -e $arg[1]) {
282 open(FH, ">$arg[1]") or return undef ;
283 close FH ;
284 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
285 }
286
05475680 287 DoTie_($tieHASH, @arg) ;
610ab055
PM
288}
289
05475680
PM
290sub TIEHASH
291{
292 tie_hash_or_array(@_) ;
293}
294
295sub TIEARRAY
296{
297 tie_hash_or_array(@_) ;
298}
88108326 299
045291aa
PM
300sub CLEAR
301{
1f70e1ea 302 my $self = shift;
3245f058 303 my $key = 0 ;
1f70e1ea
PM
304 my $value = "" ;
305 my $status = $self->seq($key, $value, R_FIRST());
306 my @keys;
307
308 while ($status == 0) {
309 push @keys, $key;
310 $status = $self->seq($key, $value, R_NEXT());
311 }
312 foreach $key (reverse @keys) {
313 my $s = $self->del($key);
314 }
315}
316
045291aa
PM
317sub EXTEND { }
318
319sub STORESIZE
320{
321 my $self = shift;
322 my $length = shift ;
323 my $current_length = $self->length() ;
324
325 if ($length < $current_length) {
326 my $key ;
327 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
328 { $self->del($key) }
329 }
a9fd575d
PM
330 elsif ($length > $current_length) {
331 $self->put($length-1, "") ;
332 }
045291aa
PM
333}
334
c5da4faf
PM
335
336sub SPLICE
337{
338 my $self = shift;
339 my $offset = shift;
340 if (not defined $offset) {
d85a743d 341 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
342 $offset = 0;
343 }
344
ebf49c8b 345 my $has_length = @_;
c5da4faf
PM
346 my $length = @_ ? shift : 0;
347 # Carping about definedness comes _after_ the OFFSET sanity check.
348 # This is so we get the same error messages as Perl's splice().
349 #
350
351 my @list = @_;
352
353 my $size = $self->FETCHSIZE();
354
355 # 'If OFFSET is negative then it start that far from the end of
356 # the array.'
357 #
358 if ($offset < 0) {
359 my $new_offset = $size + $offset;
360 if ($new_offset < 0) {
361 die "Modification of non-creatable array value attempted, "
362 . "subscript $offset";
363 }
364 $offset = $new_offset;
365 }
366
c5da4faf 367 if (not defined $length) {
d85a743d 368 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
369 $length = 0;
370 }
371
d85a743d
PM
372 if ($offset > $size) {
373 $offset = $size;
374 warnings::warnif('misc', 'splice() offset past end of array')
ebf49c8b 375 if $has_length ? $splice_end_array : $splice_end_array_no_length;
d85a743d
PM
376 }
377
c5da4faf
PM
378 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
379 if (not defined $length) {
380 $length = $size - $offset;
381 }
382
383 # 'If LENGTH is negative, leave that many elements off the end of
384 # the array.'
385 #
386 if ($length < 0) {
387 $length = $size - $offset + $length;
388
389 if ($length < 0) {
390 # The user must have specified a length bigger than the
391 # length of the array passed in. But perl's splice()
392 # doesn't catch this, it just behaves as for length=0.
393 #
394 $length = 0;
395 }
396 }
397
398 if ($length > $size - $offset) {
399 $length = $size - $offset;
400 }
401
402 # $num_elems holds the current number of elements in the database.
403 my $num_elems = $size;
404
405 # 'Removes the elements designated by OFFSET and LENGTH from an
406 # array,'...
407 #
408 my @removed = ();
409 foreach (0 .. $length - 1) {
410 my $old;
411 my $status = $self->get($offset, $old);
412 if ($status != 0) {
413 my $msg = "error from Berkeley DB on get($offset, \$old)";
414 if ($status == 1) {
415 $msg .= ' (no such element?)';
416 }
417 else {
418 $msg .= ": error status $status";
419 if (defined $! and $! ne '') {
420 $msg .= ", message $!";
421 }
422 }
423 die $msg;
424 }
425 push @removed, $old;
426
427 $status = $self->del($offset);
428 if ($status != 0) {
429 my $msg = "error from Berkeley DB on del($offset)";
430 if ($status == 1) {
431 $msg .= ' (no such element?)';
432 }
433 else {
434 $msg .= ": error status $status";
435 if (defined $! and $! ne '') {
436 $msg .= ", message $!";
437 }
438 }
439 die $msg;
440 }
441
442 -- $num_elems;
443 }
444
445 # ...'and replaces them with the elements of LIST, if any.'
446 my $pos = $offset;
447 while (defined (my $elem = shift @list)) {
448 my $old_pos = $pos;
449 my $status;
450 if ($pos >= $num_elems) {
451 $status = $self->put($pos, $elem);
452 }
453 else {
454 $status = $self->put($pos, $elem, $self->R_IBEFORE);
455 }
456
457 if ($status != 0) {
458 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
459 if ($status == 1) {
460 $msg .= ' (no such element?)';
461 }
462 else {
463 $msg .= ", error status $status";
464 if (defined $! and $! ne '') {
465 $msg .= ", message $!";
466 }
467 }
468 die $msg;
469 }
470
471 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
472 if $old_pos != $pos;
473
474 ++ $pos;
475 ++ $num_elems;
476 }
477
478 if (wantarray) {
479 # 'In list context, returns the elements removed from the
480 # array.'
481 #
482 return @removed;
483 }
484 elsif (defined wantarray and not wantarray) {
485 # 'In scalar context, returns the last element removed, or
486 # undef if no elements are removed.'
487 #
488 if (@removed) {
489 my $last = pop @removed;
490 return "$last";
491 }
492 else {
493 return undef;
494 }
495 }
496 elsif (not defined wantarray) {
497 # Void context
498 }
499 else { die }
500}
501sub ::DB_File::splice { &SPLICE }
502
6ca2e664
PM
503sub find_dup
504{
505 croak "Usage: \$db->find_dup(key,value)\n"
506 unless @_ == 3 ;
507
508 my $db = shift ;
509 my ($origkey, $value_wanted) = @_ ;
510 my ($key, $value) = ($origkey, 0);
511 my ($status) = 0 ;
512
513 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
514 $status == 0 ;
515 $status = $db->seq($key, $value, R_NEXT() ) ) {
516
517 return 0 if $key eq $origkey and $value eq $value_wanted ;
518 }
519
520 return $status ;
521}
522
523sub del_dup
524{
525 croak "Usage: \$db->del_dup(key,value)\n"
526 unless @_ == 3 ;
527
528 my $db = shift ;
529 my ($key, $value) = @_ ;
530 my ($status) = $db->find_dup($key, $value) ;
531 return $status if $status != 0 ;
532
533 $status = $db->del($key, R_CURSOR() ) ;
534 return $status ;
535}
536
88108326
PP
537sub get_dup
538{
539 croak "Usage: \$db->get_dup(key [,flag])\n"
540 unless @_ == 2 or @_ == 3 ;
541
542 my $db = shift ;
543 my $key = shift ;
544 my $flag = shift ;
f6b705ef 545 my $value = 0 ;
88108326
PP
546 my $origkey = $key ;
547 my $wantarray = wantarray ;
f6b705ef 548 my %values = () ;
88108326
PP
549 my @values = () ;
550 my $counter = 0 ;
f6b705ef 551 my $status = 0 ;
88108326 552
f6b705ef
PP
553 # iterate through the database until either EOF ($status == 0)
554 # or a different key is encountered ($key ne $origkey).
555 for ($status = $db->seq($key, $value, R_CURSOR()) ;
556 $status == 0 and $key eq $origkey ;
557 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 558
f6b705ef
PP
559 # save the value or count number of matches
560 if ($wantarray) {
561 if ($flag)
562 { ++ $values{$value} }
563 else
564 { push (@values, $value) }
565 }
566 else
567 { ++ $counter }
88108326 568
88108326
PP
569 }
570
f6b705ef 571 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326
PP
572}
573
574
a30cae0b
CBW
575sub STORABLE_freeze
576{
577 my $type = ref shift;
578 croak "Cannot freeze $type object\n";
579}
580
581sub STORABLE_thaw
582{
583 my $type = ref shift;
584 croak "Cannot thaw $type object\n";
585}
586
587
588
8e07c86e
AD
5891;
590__END__
591
3b35bae3
AD
592=head1 NAME
593
1f70e1ea 594DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3
AD
595
596=head1 SYNOPSIS
597
bbc7dcd2
MS
598 use DB_File;
599
88108326
PP
600 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
601 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
602 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 603
3b35bae3
AD
604 $status = $X->del($key [, $flags]) ;
605 $status = $X->put($key, $value [, $flags]) ;
606 $status = $X->get($key, $value [, $flags]) ;
760ac839 607 $status = $X->seq($key, $value, $flags) ;
3b35bae3
AD
608 $status = $X->sync([$flags]) ;
609 $status = $X->fd ;
760ac839 610
f6b705ef 611 # BTREE only
88108326
PP
612 $count = $X->get_dup($key) ;
613 @list = $X->get_dup($key) ;
614 %list = $X->get_dup($key, 1) ;
6ca2e664
PM
615 $status = $X->find_dup($key, $value) ;
616 $status = $X->del_dup($key, $value) ;
88108326 617
f6b705ef
PP
618 # RECNO only
619 $a = $X->length;
620 $a = $X->pop ;
621 $X->push(list);
622 $a = $X->shift;
623 $X->unshift(list);
c5da4faf 624 @r = $X->splice(offset, length, elements);
f6b705ef 625
cad2e5aa
JH
626 # DBM Filters
627 $old_filter = $db->filter_store_key ( sub { ... } ) ;
628 $old_filter = $db->filter_store_value( sub { ... } ) ;
629 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
630 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
631
3b35bae3
AD
632 untie %hash ;
633 untie @array ;
634
635=head1 DESCRIPTION
636
8e07c86e 637B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 638facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 639version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f
PM
640It is assumed that you have a copy of the Berkeley DB manual pages at
641hand when reading this documentation. The interface defined here
642mirrors the Berkeley DB interface closely.
68dc0745 643
8e07c86e
AD
644Berkeley DB is a C library which provides a consistent interface to a
645number of database formats. B<DB_File> provides an interface to all
646three of the database types currently supported by Berkeley DB.
3b35bae3
AD
647
648The file types are:
649
650=over 5
651
88108326 652=item B<DB_HASH>
3b35bae3 653
88108326 654This database type allows arbitrary key/value pairs to be stored in data
8e07c86e
AD
655files. This is equivalent to the functionality provided by other
656hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
657the files created using DB_HASH are not compatible with any of the
658other packages mentioned.
3b35bae3 659
8e07c86e
AD
660A default hashing algorithm, which will be adequate for most
661applications, is built into Berkeley DB. If you do need to use your own
662hashing algorithm it is possible to write your own in Perl and have
663B<DB_File> use it instead.
3b35bae3 664
88108326
PP
665=item B<DB_BTREE>
666
667The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 668sorted, balanced binary tree.
3b35bae3 669
8e07c86e
AD
670As with the DB_HASH format, it is possible to provide a user defined
671Perl routine to perform the comparison of keys. By default, though, the
672keys are stored in lexical order.
3b35bae3 673
88108326 674=item B<DB_RECNO>
3b35bae3 675
8e07c86e
AD
676DB_RECNO allows both fixed-length and variable-length flat text files
677to be manipulated using the same key/value pair interface as in DB_HASH
678and DB_BTREE. In this case the key will consist of a record (line)
679number.
3b35bae3
AD
680
681=back
682
e5021521 683=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea
PM
684
685Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 686it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 687limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 688version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 689like version 1. This feature allows B<DB_File> scripts that were built
e5021521 690with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea
PM
691
692If you want to make use of the new features available in Berkeley DB
b90e71be 6932.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 694
e5021521
JH
695B<Note:> The database file format has changed multiple times in Berkeley
696DB version 2, 3 and 4. If you cannot recreate your databases, you
697must dump any existing databases with either the C<db_dump> or the
698C<db_dump185> utility that comes with Berkeley DB.
699Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
700your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea
PM
701documentation for further details.
702
e5021521 703Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 704DB with DB_File.
1f70e1ea 705
68dc0745 706=head2 Interface to Berkeley DB
3b35bae3
AD
707
708B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e
AD
709in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
710allows B<DB_File> to access Berkeley DB files using either an
711associative array (for DB_HASH & DB_BTREE file types) or an ordinary
712array (for the DB_RECNO file type).
3b35bae3 713
88108326
PP
714In addition to the tie() interface, it is also possible to access most
715of the functions provided in the Berkeley DB API directly.
f6b705ef 716See L<THE API INTERFACE>.
3b35bae3 717
88108326 718=head2 Opening a Berkeley DB Database File
3b35bae3 719
8e07c86e 720Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 721Here is the C prototype for dbopen():
3b35bae3
AD
722
723 DB*
724 dbopen (const char * file, int flags, int mode,
725 DBTYPE type, const void * openinfo)
726
727The parameter C<type> is an enumeration which specifies which of the 3
728interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
729Depending on which of these is actually chosen, the final parameter,
730I<openinfo> points to a data structure which allows tailoring of the
731specific interface method.
732
8e07c86e 733This interface is handled slightly differently in B<DB_File>. Here is
88108326 734an equivalent call using B<DB_File>:
3b35bae3 735
88108326 736 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 737
8e07c86e
AD
738The C<filename>, C<flags> and C<mode> parameters are the direct
739equivalent of their dbopen() counterparts. The final parameter $DB_HASH
740performs the function of both the C<type> and C<openinfo> parameters in
741dbopen().
3b35bae3 742
88108326
PP
743In the example above $DB_HASH is actually a pre-defined reference to a
744hash object. B<DB_File> has three of these pre-defined references.
745Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 746
8e07c86e
AD
747The keys allowed in each of these pre-defined references is limited to
748the names used in the equivalent C structure. So, for example, the
749$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326
PP
750C<ffactor>, C<hash>, C<lorder> and C<nelem>.
751
752To change one of these elements, just assign to it like this:
753
754 $DB_HASH->{'cachesize'} = 10000 ;
755
756The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
757usually adequate for most applications. If you do need to create extra
758instances of these objects, constructors are available for each file
759type.
760
761Here are examples of the constructors and the valid options available
762for DB_HASH, DB_BTREE and DB_RECNO respectively.
763
764 $a = new DB_File::HASHINFO ;
765 $a->{'bsize'} ;
766 $a->{'cachesize'} ;
767 $a->{'ffactor'};
768 $a->{'hash'} ;
769 $a->{'lorder'} ;
770 $a->{'nelem'} ;
771
772 $b = new DB_File::BTREEINFO ;
773 $b->{'flags'} ;
774 $b->{'cachesize'} ;
775 $b->{'maxkeypage'} ;
776 $b->{'minkeypage'} ;
777 $b->{'psize'} ;
778 $b->{'compare'} ;
779 $b->{'prefix'} ;
780 $b->{'lorder'} ;
781
782 $c = new DB_File::RECNOINFO ;
783 $c->{'bval'} ;
784 $c->{'cachesize'} ;
785 $c->{'psize'} ;
786 $c->{'flags'} ;
787 $c->{'lorder'} ;
788 $c->{'reclen'} ;
789 $c->{'bfname'} ;
790
791The values stored in the hashes above are mostly the direct equivalent
792of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 793default values - that means you don't have to set I<all> of the
88108326
PP
794values when you only want to change one. Here is an example:
795
796 $a = new DB_File::HASHINFO ;
797 $a->{'cachesize'} = 12345 ;
798 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
799
36477c24 800A few of the options need extra discussion here. When used, the C
88108326
PP
801equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
802to C functions. In B<DB_File> these keys are used to store references
803to Perl subs. Below are templates for each of the subs:
804
805 sub hash
806 {
807 my ($data) = @_ ;
808 ...
809 # return the hash value for $data
810 return $hash ;
811 }
3b35bae3 812
88108326
PP
813 sub compare
814 {
815 my ($key, $key2) = @_ ;
816 ...
817 # return 0 if $key1 eq $key2
818 # -1 if $key1 lt $key2
819 # 1 if $key1 gt $key2
820 return (-1 , 0 or 1) ;
821 }
3b35bae3 822
88108326
PP
823 sub prefix
824 {
825 my ($key, $key2) = @_ ;
826 ...
827 # return number of bytes of $key2 which are
828 # necessary to determine that it is greater than $key1
829 return $bytes ;
830 }
3b35bae3 831
f6b705ef
PP
832See L<Changing the BTREE sort order> for an example of using the
833C<compare> template.
88108326 834
36477c24 835If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 836C<bval>, you should check out L<The 'bval' Option>.
36477c24 837
88108326
PP
838=head2 Default Parameters
839
840It is possible to omit some or all of the final 4 parameters in the
841call to C<tie> and let them take default values. As DB_HASH is the most
842common file format used, the call:
843
844 tie %A, "DB_File", "filename" ;
845
846is equivalent to:
847
18d2dc8c 848 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326
PP
849
850It is also possible to omit the filename parameter as well, so the
851call:
852
853 tie %A, "DB_File" ;
854
855is equivalent to:
856
18d2dc8c 857 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 858
f6b705ef 859See L<In Memory Databases> for a discussion on the use of C<undef>
88108326
PP
860in place of a filename.
861
f6b705ef
PP
862=head2 In Memory Databases
863
864Berkeley DB allows the creation of in-memory databases by using NULL
865(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
866uses C<undef> instead of NULL to provide this functionality.
867
868=head1 DB_HASH
869
870The DB_HASH file format is probably the most commonly used of the three
871file formats that B<DB_File> supports. It is also very straightforward
872to use.
873
68dc0745 874=head2 A Simple Example
f6b705ef
PP
875
876This example shows how to create a database, add key/value pairs to the
877database, delete keys/value pairs and finally how to enumerate the
878contents of the database.
879
3245f058 880 use warnings ;
610ab055 881 use strict ;
f6b705ef 882 use DB_File ;
07200f1b 883 our (%h, $k, $v) ;
f6b705ef 884
2c2d71f5 885 unlink "fruit" ;
45a340cb 886 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef
PP
887 or die "Cannot open file 'fruit': $!\n";
888
889 # Add a few key/value pairs to the file
890 $h{"apple"} = "red" ;
891 $h{"orange"} = "orange" ;
892 $h{"banana"} = "yellow" ;
893 $h{"tomato"} = "red" ;
894
895 # Check for existence of a key
896 print "Banana Exists\n\n" if $h{"banana"} ;
897
898 # Delete a key/value pair.
899 delete $h{"apple"} ;
900
901 # print the contents of the file
902 while (($k, $v) = each %h)
903 { print "$k -> $v\n" }
904
905 untie %h ;
906
907here is the output:
908
909 Banana Exists
bbc7dcd2 910
f6b705ef
PP
911 orange -> orange
912 tomato -> red
913 banana -> yellow
914
915Note that the like ordinary associative arrays, the order of the keys
916retrieved is in an apparently random order.
917
918=head1 DB_BTREE
919
920The DB_BTREE format is useful when you want to store data in a given
921order. By default the keys will be stored in lexical order, but as you
922will see from the example shown in the next section, it is very easy to
923define your own sorting function.
924
925=head2 Changing the BTREE sort order
926
927This script shows how to override the default sorting algorithm that
928BTREE uses. Instead of using the normal lexical ordering, a case
929insensitive compare function will be used.
88108326 930
3245f058 931 use warnings ;
610ab055 932 use strict ;
f6b705ef 933 use DB_File ;
610ab055
PM
934
935 my %h ;
f6b705ef
PP
936
937 sub Compare
938 {
939 my ($key1, $key2) = @_ ;
940 "\L$key1" cmp "\L$key2" ;
941 }
942
943 # specify the Perl sub that will do the comparison
944 $DB_BTREE->{'compare'} = \&Compare ;
945
2c2d71f5 946 unlink "tree" ;
45a340cb 947 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef
PP
948 or die "Cannot open file 'tree': $!\n" ;
949
950 # Add a key/value pair to the file
951 $h{'Wall'} = 'Larry' ;
952 $h{'Smith'} = 'John' ;
953 $h{'mouse'} = 'mickey' ;
954 $h{'duck'} = 'donald' ;
955
956 # Delete
957 delete $h{"duck"} ;
958
959 # Cycle through the keys printing them in order.
960 # Note it is not necessary to sort the keys as
961 # the btree will have kept them in order automatically.
962 foreach (keys %h)
963 { print "$_\n" }
964
965 untie %h ;
966
967Here is the output from the code above.
968
969 mouse
970 Smith
971 Wall
972
973There are a few point to bear in mind if you want to change the
974ordering in a BTREE database:
975
976=over 5
977
978=item 1.
979
980The new compare function must be specified when you create the database.
981
982=item 2.
983
984You cannot change the ordering once the database has been created. Thus
985you must use the same compare function every time you access the
88108326
PP
986database.
987
39793c41
PM
988=item 3
989
990Duplicate keys are entirely defined by the comparison function.
991In the case-insensitive example above, the keys: 'KEY' and 'key'
992would be considered duplicates, and assigning to the second one
52ffee89 993would overwrite the first. If duplicates are allowed for (with the
59e51af5 994R_DUP flag discussed below), only a single copy of duplicate keys
39793c41
PM
995is stored in the database --- so (again with example above) assigning
996three values to the keys: 'KEY', 'Key', and 'key' would leave just
997the first key: 'KEY' in the database with three values. For some
998situations this results in information loss, so care should be taken
999to provide fully qualified comparison functions when necessary.
1000For example, the above comparison routine could be modified to
1001additionally compare case-sensitively if two keys are equal in the
1002case insensitive comparison:
1003
1004 sub compare {
1005 my($key1, $key2) = @_;
1006 lc $key1 cmp lc $key2 ||
1007 $key1 cmp $key2;
1008 }
1009
1010And now you will only have duplicates when the keys themselves
1011are truly the same. (note: in versions of the db library prior to
1012about November 1996, such duplicate keys were retained so it was
1013possible to recover the original keys in sets of keys that
1014compared as equal).
1015
1016
f6b705ef
PP
1017=back
1018
68dc0745 1019=head2 Handling Duplicate Keys
f6b705ef
PP
1020
1021The BTREE file type optionally allows a single key to be associated
1022with an arbitrary number of values. This option is enabled by setting
1023the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1024
88108326
PP
1025There are some difficulties in using the tied hash interface if you
1026want to manipulate a BTREE database with duplicate keys. Consider this
1027code:
1028
3245f058 1029 use warnings ;
610ab055 1030 use strict ;
88108326 1031 use DB_File ;
610ab055 1032
962cee9f 1033 my ($filename, %h) ;
610ab055 1034
88108326
PP
1035 $filename = "tree" ;
1036 unlink $filename ;
bbc7dcd2 1037
88108326
PP
1038 # Enable duplicate records
1039 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1040
45a340cb 1041 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1042 or die "Cannot open $filename: $!\n";
bbc7dcd2 1043
88108326
PP
1044 # Add some key/value pairs to the file
1045 $h{'Wall'} = 'Larry' ;
1046 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1047 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326
PP
1048 $h{'Smith'} = 'John' ;
1049 $h{'mouse'} = 'mickey' ;
1050
1051 # iterate through the associative array
1052 # and print each key/value pair.
2c2d71f5 1053 foreach (sort keys %h)
88108326
PP
1054 { print "$_ -> $h{$_}\n" }
1055
f6b705ef
PP
1056 untie %h ;
1057
88108326
PP
1058Here is the output:
1059
1060 Smith -> John
1061 Wall -> Larry
1062 Wall -> Larry
f6b705ef 1063 Wall -> Larry
88108326
PP
1064 mouse -> mickey
1065
f6b705ef 1066As you can see 3 records have been successfully created with key C<Wall>
88108326 1067- the only thing is, when they are retrieved from the database they
f6b705ef
PP
1068I<seem> to have the same value, namely C<Larry>. The problem is caused
1069by the way that the associative array interface works. Basically, when
1070the associative array interface is used to fetch the value associated
1071with a given key, it will only ever retrieve the first value.
88108326
PP
1072
1073Although it may not be immediately obvious from the code above, the
1074associative array interface can be used to write values with duplicate
1075keys, but it cannot be used to read them back from the database.
1076
1077The way to get around this problem is to use the Berkeley DB API method
1078called C<seq>. This method allows sequential access to key/value
f6b705ef
PP
1079pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1080and the API in general.
88108326
PP
1081
1082Here is the script above rewritten using the C<seq> API method.
1083
3245f058 1084 use warnings ;
610ab055 1085 use strict ;
88108326 1086 use DB_File ;
bbc7dcd2 1087
962cee9f 1088 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1089
88108326
PP
1090 $filename = "tree" ;
1091 unlink $filename ;
bbc7dcd2 1092
88108326
PP
1093 # Enable duplicate records
1094 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1095
45a340cb 1096 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1097 or die "Cannot open $filename: $!\n";
bbc7dcd2 1098
88108326
PP
1099 # Add some key/value pairs to the file
1100 $h{'Wall'} = 'Larry' ;
1101 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1102 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326
PP
1103 $h{'Smith'} = 'John' ;
1104 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1105
f6b705ef 1106 # iterate through the btree using seq
88108326 1107 # and print each key/value pair.
610ab055 1108 $key = $value = 0 ;
f6b705ef
PP
1109 for ($status = $x->seq($key, $value, R_FIRST) ;
1110 $status == 0 ;
1111 $status = $x->seq($key, $value, R_NEXT) )
88108326 1112 { print "$key -> $value\n" }
bbc7dcd2 1113
88108326
PP
1114 undef $x ;
1115 untie %h ;
1116
1117that prints:
1118
1119 Smith -> John
1120 Wall -> Brick
f6b705ef 1121 Wall -> Brick
88108326
PP
1122 Wall -> Larry
1123 mouse -> mickey
1124
f6b705ef 1125This time we have got all the key/value pairs, including the multiple
88108326
PP
1126values associated with the key C<Wall>.
1127
6ca2e664
PM
1128To make life easier when dealing with duplicate keys, B<DB_File> comes with
1129a few utility methods.
1130
68dc0745 1131=head2 The get_dup() Method
f6b705ef 1132
6ca2e664 1133The C<get_dup> method assists in
88108326
PP
1134reading duplicate values from BTREE databases. The method can take the
1135following forms:
1136
1137 $count = $x->get_dup($key) ;
1138 @list = $x->get_dup($key) ;
1139 %list = $x->get_dup($key, 1) ;
1140
1141In a scalar context the method returns the number of values associated
1142with the key, C<$key>.
1143
1144In list context, it returns all the values which match C<$key>. Note
f6b705ef 1145that the values will be returned in an apparently random order.
88108326 1146
7a2e2cd6
PP
1147In list context, if the second parameter is present and evaluates
1148TRUE, the method returns an associative array. The keys of the
1149associative array correspond to the values that matched in the BTREE
1150and the values of the array are a count of the number of times that
1151particular value occurred in the BTREE.
88108326 1152
f6b705ef 1153So assuming the database created above, we can use C<get_dup> like
88108326
PP
1154this:
1155
3245f058 1156 use warnings ;
2c2d71f5
JH
1157 use strict ;
1158 use DB_File ;
bbc7dcd2 1159
962cee9f 1160 my ($filename, $x, %h) ;
2c2d71f5
JH
1161
1162 $filename = "tree" ;
bbc7dcd2 1163
2c2d71f5
JH
1164 # Enable duplicate records
1165 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1166
45a340cb 1167 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5
JH
1168 or die "Cannot open $filename: $!\n";
1169
610ab055 1170 my $cnt = $x->get_dup("Wall") ;
88108326
PP
1171 print "Wall occurred $cnt times\n" ;
1172
610ab055 1173 my %hash = $x->get_dup("Wall", 1) ;
88108326 1174 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1175 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1176
2c2d71f5 1177 my @list = sort $x->get_dup("Wall") ;
88108326
PP
1178 print "Wall => [@list]\n" ;
1179
f6b705ef 1180 @list = $x->get_dup("Smith") ;
88108326 1181 print "Smith => [@list]\n" ;
bbc7dcd2 1182
f6b705ef 1183 @list = $x->get_dup("Dog") ;
88108326
PP
1184 print "Dog => [@list]\n" ;
1185
1186
1187and it will print:
1188
f6b705ef 1189 Wall occurred 3 times
88108326 1190 Larry is there
f6b705ef
PP
1191 There are 2 Brick Walls
1192 Wall => [Brick Brick Larry]
88108326
PP
1193 Smith => [John]
1194 Dog => []
3b35bae3 1195
6ca2e664
PM
1196=head2 The find_dup() Method
1197
1198 $status = $X->find_dup($key, $value) ;
1199
b90e71be 1200This method checks for the existence of a specific key/value pair. If the
6ca2e664
PM
1201pair exists, the cursor is left pointing to the pair and the method
1202returns 0. Otherwise the method returns a non-zero value.
1203
1204Assuming the database from the previous example:
1205
3245f058 1206 use warnings ;
6ca2e664
PM
1207 use strict ;
1208 use DB_File ;
bbc7dcd2 1209
962cee9f 1210 my ($filename, $x, %h, $found) ;
6ca2e664 1211
07200f1b 1212 $filename = "tree" ;
bbc7dcd2 1213
6ca2e664
PM
1214 # Enable duplicate records
1215 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1216
45a340cb 1217 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1218 or die "Cannot open $filename: $!\n";
1219
1220 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1221 print "Larry Wall is $found there\n" ;
bbc7dcd2 1222
6ca2e664
PM
1223 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1224 print "Harry Wall is $found there\n" ;
bbc7dcd2 1225
6ca2e664
PM
1226 undef $x ;
1227 untie %h ;
1228
1229prints this
1230
2c2d71f5 1231 Larry Wall is there
6ca2e664
PM
1232 Harry Wall is not there
1233
1234
1235=head2 The del_dup() Method
1236
1237 $status = $X->del_dup($key, $value) ;
1238
1239This method deletes a specific key/value pair. It returns
12400 if they exist and have been deleted successfully.
1241Otherwise the method returns a non-zero value.
1242
b90e71be 1243Again assuming the existence of the C<tree> database
6ca2e664 1244
3245f058 1245 use warnings ;
6ca2e664
PM
1246 use strict ;
1247 use DB_File ;
bbc7dcd2 1248
962cee9f 1249 my ($filename, $x, %h, $found) ;
6ca2e664 1250
07200f1b 1251 $filename = "tree" ;
bbc7dcd2 1252
6ca2e664
PM
1253 # Enable duplicate records
1254 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1255
45a340cb 1256 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1257 or die "Cannot open $filename: $!\n";
1258
1259 $x->del_dup("Wall", "Larry") ;
1260
1261 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1262 print "Larry Wall is $found there\n" ;
bbc7dcd2 1263
6ca2e664
PM
1264 undef $x ;
1265 untie %h ;
1266
1267prints this
1268
1269 Larry Wall is not there
1270
f6b705ef
PP
1271=head2 Matching Partial Keys
1272
1273The BTREE interface has a feature which allows partial keys to be
1274matched. This functionality is I<only> available when the C<seq> method
1275is used along with the R_CURSOR flag.
1276
1277 $x->seq($key, $value, R_CURSOR) ;
1278
1279Here is the relevant quote from the dbopen man page where it defines
1280the use of the R_CURSOR flag with seq:
1281
f6b705ef
PP
1282 Note, for the DB_BTREE access method, the returned key is not
1283 necessarily an exact match for the specified key. The returned key
1284 is the smallest key greater than or equal to the specified key,
1285 permitting partial key matches and range searches.
1286
f6b705ef
PP
1287In the example script below, the C<match> sub uses this feature to find
1288and print the first matching key/value pair given a partial key.
1289
3245f058 1290 use warnings ;
610ab055 1291 use strict ;
f6b705ef
PP
1292 use DB_File ;
1293 use Fcntl ;
610ab055 1294
962cee9f 1295 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef
PP
1296
1297 sub match
1298 {
1299 my $key = shift ;
610ab055 1300 my $value = 0;
f6b705ef
PP
1301 my $orig_key = $key ;
1302 $x->seq($key, $value, R_CURSOR) ;
1303 print "$orig_key\t-> $key\t-> $value\n" ;
1304 }
1305
1306 $filename = "tree" ;
1307 unlink $filename ;
1308
45a340cb 1309 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1310 or die "Cannot open $filename: $!\n";
bbc7dcd2 1311
f6b705ef
PP
1312 # Add some key/value pairs to the file
1313 $h{'mouse'} = 'mickey' ;
1314 $h{'Wall'} = 'Larry' ;
1315 $h{'Walls'} = 'Brick' ;
1316 $h{'Smith'} = 'John' ;
bbc7dcd2 1317
f6b705ef 1318
610ab055 1319 $key = $value = 0 ;
f6b705ef
PP
1320 print "IN ORDER\n" ;
1321 for ($st = $x->seq($key, $value, R_FIRST) ;
1322 $st == 0 ;
1323 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1324
2c2d71f5 1325 { print "$key -> $value\n" }
bbc7dcd2 1326
f6b705ef
PP
1327 print "\nPARTIAL MATCH\n" ;
1328
1329 match "Wa" ;
1330 match "A" ;
1331 match "a" ;
1332
1333 undef $x ;
1334 untie %h ;
1335
1336Here is the output:
1337
1338 IN ORDER
1339 Smith -> John
1340 Wall -> Larry
1341 Walls -> Brick
1342 mouse -> mickey
1343
1344 PARTIAL MATCH
1345 Wa -> Wall -> Larry
1346 A -> Smith -> John
1347 a -> mouse -> mickey
1348
1349=head1 DB_RECNO
1350
1351DB_RECNO provides an interface to flat text files. Both variable and
1352fixed length records are supported.
3b35bae3 1353
6ca2e664 1354In order to make RECNO more compatible with Perl, the array offset for
88108326 1355all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1356
88108326
PP
1357As with normal Perl arrays, a RECNO array can be accessed using
1358negative indexes. The index -1 refers to the last element of the array,
1359-2 the second last, and so on. Attempting to access an element before
1360the start of the array will raise a fatal run-time error.
3b35bae3 1361
68dc0745 1362=head2 The 'bval' Option
36477c24
PP
1363
1364The operation of the bval option warrants some discussion. Here is the
1365definition of bval from the Berkeley DB 1.85 recno manual page:
1366
1367 The delimiting byte to be used to mark the end of a
1368 record for variable-length records, and the pad charac-
1369 ter for fixed-length records. If no value is speci-
1370 fied, newlines (``\n'') are used to mark the end of
1371 variable-length records and fixed-length records are
1372 padded with spaces.
1373
1374The second sentence is wrong. In actual fact bval will only default to
1375C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1376openinfo parameter is used at all, the value that happens to be in bval
1377will be used. That means you always have to specify bval when making
1378use of any of the options in the openinfo parameter. This documentation
1379error will be fixed in the next release of Berkeley DB.
1380
1381That clarifies the situation with regards Berkeley DB itself. What
1382about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1383quite useful, so B<DB_File> conforms to it.
36477c24
PP
1384
1385That means that you can specify other options (e.g. cachesize) and
1386still have bval default to C<"\n"> for variable length records, and
1387space for fixed length records.
1388
c5da4faf 1389Also note that the bval option only allows you to specify a single byte
a6d6498e 1390as a delimiter.
c5da4faf 1391
f6b705ef 1392=head2 A Simple Example
3b35bae3 1393
6ca2e664
PM
1394Here is a simple example that uses RECNO (if you are using a version
1395of Perl earlier than 5.004_57 this example won't work -- see
1396L<Extra RECNO Methods> for a workaround).
f6b705ef 1397
3245f058 1398 use warnings ;
610ab055 1399 use strict ;
f6b705ef 1400 use DB_File ;
f6b705ef 1401
2c2d71f5
JH
1402 my $filename = "text" ;
1403 unlink $filename ;
1404
610ab055 1405 my @h ;
45a340cb 1406 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef
PP
1407 or die "Cannot open file 'text': $!\n" ;
1408
1409 # Add a few key/value pairs to the file
1410 $h[0] = "orange" ;
1411 $h[1] = "blue" ;
1412 $h[2] = "yellow" ;
1413
6ca2e664
PM
1414 push @h, "green", "black" ;
1415
1416 my $elements = scalar @h ;
1417 print "The array contains $elements entries\n" ;
1418
1419 my $last = pop @h ;
1420 print "popped $last\n" ;
1421
1422 unshift @h, "white" ;
1423 my $first = shift @h ;
1424 print "shifted $first\n" ;
1425
f6b705ef
PP
1426 # Check for existence of a key
1427 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1428
1429 # use a negative index
1430 print "The last element is $h[-1]\n" ;
1431 print "The 2nd last element is $h[-2]\n" ;
1432
1433 untie @h ;
3b35bae3 1434
f6b705ef
PP
1435Here is the output from the script:
1436
6ca2e664
PM
1437 The array contains 5 entries
1438 popped black
2c2d71f5 1439 shifted white
f6b705ef 1440 Element 1 Exists with value blue
6ca2e664
PM
1441 The last element is green
1442 The 2nd last element is yellow
f6b705ef 1443
6ca2e664 1444=head2 Extra RECNO Methods
f6b705ef 1445
045291aa 1446If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664
PM
1447array interface is quite limited. In the example script above
1448C<push>, C<pop>, C<shift>, C<unshift>
1449or determining the array length will not work with a tied array.
045291aa
PM
1450
1451To make the interface more useful for older versions of Perl, a number
1452of methods are supplied with B<DB_File> to simulate the missing array
1453operations. All these methods are accessed via the object returned from
1454the tie call.
f6b705ef
PP
1455
1456Here are the methods:
1457
1458=over 5
3b35bae3 1459
f6b705ef
PP
1460=item B<$X-E<gt>push(list) ;>
1461
1462Pushes the elements of C<list> to the end of the array.
1463
1464=item B<$value = $X-E<gt>pop ;>
1465
1466Removes and returns the last element of the array.
1467
1468=item B<$X-E<gt>shift>
1469
1470Removes and returns the first element of the array.
1471
1472=item B<$X-E<gt>unshift(list) ;>
1473
1474Pushes the elements of C<list> to the start of the array.
1475
1476=item B<$X-E<gt>length>
1477
1478Returns the number of elements in the array.
1479
c5da4faf
PM
1480=item B<$X-E<gt>splice(offset, length, elements);>
1481
a6d05634 1482Returns a splice of the array.
c5da4faf 1483
f6b705ef
PP
1484=back
1485
1486=head2 Another Example
1487
1488Here is a more complete example that makes use of some of the methods
1489described above. It also makes use of the API interface directly (see
1490L<THE API INTERFACE>).
1491
3245f058 1492 use warnings ;
f6b705ef 1493 use strict ;
962cee9f 1494 my (@h, $H, $file, $i) ;
f6b705ef
PP
1495 use DB_File ;
1496 use Fcntl ;
bbc7dcd2 1497
f6b705ef
PP
1498 $file = "text" ;
1499
1500 unlink $file ;
1501
45a340cb 1502 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1503 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1504
f6b705ef
PP
1505 # first create a text file to play with
1506 $h[0] = "zero" ;
1507 $h[1] = "one" ;
1508 $h[2] = "two" ;
1509 $h[3] = "three" ;
1510 $h[4] = "four" ;
1511
bbc7dcd2 1512
f6b705ef
PP
1513 # Print the records in order.
1514 #
1515 # The length method is needed here because evaluating a tied
1516 # array in a scalar context does not return the number of
1517 # elements in the array.
1518
1519 print "\nORIGINAL\n" ;
1520 foreach $i (0 .. $H->length - 1) {
1521 print "$i: $h[$i]\n" ;
1522 }
1523
1524 # use the push & pop methods
1525 $a = $H->pop ;
1526 $H->push("last") ;
1527 print "\nThe last record was [$a]\n" ;
1528
1529 # and the shift & unshift methods
1530 $a = $H->shift ;
1531 $H->unshift("first") ;
1532 print "The first record was [$a]\n" ;
1533
1534 # Use the API to add a new record after record 2.
1535 $i = 2 ;
1536 $H->put($i, "Newbie", R_IAFTER) ;
1537
1538 # and a new record before record 1.
1539 $i = 1 ;
1540 $H->put($i, "New One", R_IBEFORE) ;
1541
1542 # delete record 3
1543 $H->del(3) ;
1544
1545 # now print the records in reverse order
1546 print "\nREVERSE\n" ;
1547 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1548 { print "$i: $h[$i]\n" }
1549
1550 # same again, but use the API functions instead
1551 print "\nREVERSE again\n" ;
610ab055 1552 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef
PP
1553 for ($s = $H->seq($k, $v, R_LAST) ;
1554 $s == 0 ;
1555 $s = $H->seq($k, $v, R_PREV))
1556 { print "$k: $v\n" }
1557
1558 undef $H ;
1559 untie @h ;
1560
1561and this is what it outputs:
1562
1563 ORIGINAL
1564 0: zero
1565 1: one
1566 2: two
1567 3: three
1568 4: four
1569
1570 The last record was [four]
1571 The first record was [zero]
1572
1573 REVERSE
1574 5: last
1575 4: three
1576 3: Newbie
1577 2: one
1578 1: New One
1579 0: first
1580
1581 REVERSE again
1582 5: last
1583 4: three
1584 3: Newbie
1585 2: one
1586 1: New One
1587 0: first
1588
1589Notes:
1590
1591=over 5
1592
1593=item 1.
1594
1595Rather than iterating through the array, C<@h> like this:
1596
1597 foreach $i (@h)
1598
1599it is necessary to use either this:
1600
1601 foreach $i (0 .. $H->length - 1)
1602
1603or this:
1604
1605 for ($a = $H->get($k, $v, R_FIRST) ;
1606 $a == 0 ;
1607 $a = $H->get($k, $v, R_NEXT) )
1608
1609=item 2.
1610
1611Notice that both times the C<put> method was used the record index was
1612specified using a variable, C<$i>, rather than the literal value
1613itself. This is because C<put> will return the record number of the
1614inserted line via that parameter.
1615
1616=back
1617
1618=head1 THE API INTERFACE
3b35bae3
AD
1619
1620As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1621possible to make direct use of most of the API functions defined in the
8e07c86e 1622Berkeley DB documentation.
3b35bae3 1623
88108326 1624To do this you need to store a copy of the object returned from the tie.
3b35bae3 1625
88108326 1626 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1627
8e07c86e 1628Once you have done that, you can access the Berkeley DB API functions
88108326 1629as B<DB_File> methods directly like this:
3b35bae3
AD
1630
1631 $db->put($key, $value, R_NOOVERWRITE) ;
1632
88108326
PP
1633B<Important:> If you have saved a copy of the object returned from
1634C<tie>, the underlying database file will I<not> be closed until both
1635the tied variable is untied and all copies of the saved object are
610ab055 1636destroyed.
88108326
PP
1637
1638 use DB_File ;
1639 $db = tie %hash, "DB_File", "filename"
1640 or die "Cannot tie filename: $!" ;
1641 ...
1642 undef $db ;
1643 untie %hash ;
1644
9a2c4ce3 1645See L<The untie() Gotcha> for more details.
778183f3 1646
88108326
PP
1647All the functions defined in L<dbopen> are available except for
1648close() and dbopen() itself. The B<DB_File> method interface to the
1649supported functions have been implemented to mirror the way Berkeley DB
1650works whenever possible. In particular note that:
1651
1652=over 5
1653
1654=item *
1655
1656The methods return a status value. All return 0 on success.
1657All return -1 to signify an error and set C<$!> to the exact
1658error code. The return code 1 generally (but not always) means that the
1659key specified did not exist in the database.
1660
1661Other return codes are defined. See below and in the Berkeley DB
1662documentation for details. The Berkeley DB documentation should be used
1663as the definitive source.
1664
1665=item *
3b35bae3 1666
88108326
PP
1667Whenever a Berkeley DB function returns data via one of its parameters,
1668the equivalent B<DB_File> method does exactly the same.
3b35bae3 1669
88108326
PP
1670=item *
1671
1672If you are careful, it is possible to mix API calls with the tied
1673hash/array interface in the same piece of code. Although only a few of
1674the methods used to implement the tied interface currently make use of
1675the cursor, you should always assume that the cursor has been changed
1676any time the tied hash/array interface is used. As an example, this
1677code will probably not do what you expect:
1678
1679 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1680 or die "Cannot tie $filename: $!" ;
1681
1682 # Get the first key/value pair and set the cursor
1683 $X->seq($key, $value, R_FIRST) ;
1684
1685 # this line will modify the cursor
1686 $count = scalar keys %x ;
1687
1688 # Get the second key/value pair.
1689 # oops, it didn't, it got the last key/value pair!
1690 $X->seq($key, $value, R_NEXT) ;
1691
1692The code above can be rearranged to get around the problem, like this:
1693
1694 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1695 or die "Cannot tie $filename: $!" ;
1696
1697 # this line will modify the cursor
1698 $count = scalar keys %x ;
1699
1700 # Get the first key/value pair and set the cursor
1701 $X->seq($key, $value, R_FIRST) ;
1702
1703 # Get the second key/value pair.
1704 # worked this time.
1705 $X->seq($key, $value, R_NEXT) ;
1706
1707=back
1708
1709All the constants defined in L<dbopen> for use in the flags parameters
1710in the methods defined below are also available. Refer to the Berkeley
1711DB documentation for the precise meaning of the flags values.
1712
1713Below is a list of the methods available.
3b35bae3
AD
1714
1715=over 5
1716
f6b705ef 1717=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326
PP
1718
1719Given a key (C<$key>) this method reads the value associated with it
1720from the database. The value read from the database is returned in the
1721C<$value> parameter.
3b35bae3 1722
88108326 1723If the key does not exist the method returns 1.
3b35bae3 1724
88108326 1725No flags are currently defined for this method.
3b35bae3 1726
f6b705ef 1727=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1728
88108326
PP
1729Stores the key/value pair in the database.
1730
1731If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1732will have the record number of the inserted key/value pair set.
3b35bae3 1733
88108326
PP
1734Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1735R_SETCURSOR.
1736
f6b705ef 1737=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1738
88108326 1739Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1740
88108326
PP
1741A return code of 1 means that the requested key was not in the
1742database.
3b35bae3 1743
88108326 1744R_CURSOR is the only valid flag at present.
3b35bae3 1745
f6b705ef 1746=item B<$status = $X-E<gt>fd ;>
3b35bae3 1747
88108326 1748Returns the file descriptor for the underlying database.
3b35bae3 1749
b90e71be
GS
1750See L<Locking: The Trouble with fd> for an explanation for why you should
1751not use C<fd> to lock your database.
3b35bae3 1752
f6b705ef 1753=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1754
88108326
PP
1755This interface allows sequential retrieval from the database. See
1756L<dbopen> for full details.
1757
1758Both the C<$key> and C<$value> parameters will be set to the key/value
1759pair read from the database.
1760
1761The flags parameter is mandatory. The valid flag values are R_CURSOR,
1762R_FIRST, R_LAST, R_NEXT and R_PREV.
1763
f6b705ef 1764=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326
PP
1765
1766Flushes any cached buffers to disk.
1767
1768R_RECNOSYNC is the only valid flag at present.
3b35bae3
AD
1769
1770=back
1771
cad2e5aa
JH
1772=head1 DBM FILTERS
1773
c7cd1ed9
JH
1774A DBM Filter is a piece of code that is be used when you I<always> want to
1775make the same transformation to all keys and/or values in a DBM database.
1776An example is when you need to encode your data in UTF-8 before writing to
1777the database and then decode the UTF-8 when reading from the database file.
1778
1779There are two ways to use a DBM Filter.
1780
1781=over 5
1782
1783=item 1.
1784
1785Using the low-level API defined below.
1786
1787=item 2.
1788
1789Using the L<DBM_Filter> module.
1790This module hides the complexity of the API defined below and comes
1791with a number of "canned" filters that cover some of the common use-cases.
1792
1793=back
1794
1795Use of the L<DBM_Filter> module is recommended.
1796
1797=head2 DBM Filter Low-level API
cad2e5aa
JH
1798
1799There are four methods associated with DBM Filters. All work identically,
1800and each is used to install (or uninstall) a single DBM Filter. Each
1801expects a single parameter, namely a reference to a sub. The only
1802difference between them is the place that the filter is installed.
1803
1804To summarise:
1805
1806=over 5
1807
1808=item B<filter_store_key>
1809
1810If a filter has been installed with this method, it will be invoked
1811every time you write a key to a DBM database.
1812
1813=item B<filter_store_value>
1814
1815If a filter has been installed with this method, it will be invoked
1816every time you write a value to a DBM database.
1817
1818
1819=item B<filter_fetch_key>
1820
1821If a filter has been installed with this method, it will be invoked
1822every time you read a key from a DBM database.
1823
1824=item B<filter_fetch_value>
1825
1826If a filter has been installed with this method, it will be invoked
1827every time you read a value from a DBM database.
1828
1829=back
1830
1831You can use any combination of the methods, from none, to all four.
1832
1833All filter methods return the existing filter, if present, or C<undef>
1834in not.
1835
1836To delete a filter pass C<undef> to it.
1837
1838=head2 The Filter
1839
1840When each filter is called by Perl, a local copy of C<$_> will contain
1841the key or value to be filtered. Filtering is achieved by modifying
1842the contents of C<$_>. The return code from the filter is ignored.
1843
1844=head2 An Example -- the NULL termination problem.
1845
1846Consider the following scenario. You have a DBM database
1847that you need to share with a third-party C application. The C application
1848assumes that I<all> keys and values are NULL terminated. Unfortunately
1849when Perl writes to DBM databases it doesn't use NULL termination, so
1850your Perl application will have to manage NULL termination itself. When
1851you write to the database you will have to use something like this:
1852
1853 $hash{"$key\0"} = "$value\0" ;
1854
1855Similarly the NULL needs to be taken into account when you are considering
1856the length of existing keys/values.
1857
1858It would be much better if you could ignore the NULL terminations issue
1859in the main application code and have a mechanism that automatically
1860added the terminating NULL to all keys and values whenever you write to
1861the database and have them removed when you read from the database. As I'm
1862sure you have already guessed, this is a problem that DBM Filters can
1863fix very easily.
1864
3245f058 1865 use warnings ;
cad2e5aa
JH
1866 use strict ;
1867 use DB_File ;
1868
1869 my %hash ;
2359510d 1870 my $filename = "filt" ;
cad2e5aa
JH
1871 unlink $filename ;
1872
1873 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1874 or die "Cannot open $filename: $!\n" ;
1875
1876 # Install DBM Filters
1877 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1878 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1879 $db->filter_fetch_value( sub { s/\0$// } ) ;
1880 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1881
1882 $hash{"abc"} = "def" ;
1883 my $a = $hash{"ABC"} ;
1884 # ...
1885 undef $db ;
1886 untie %hash ;
1887
1888Hopefully the contents of each of the filters should be
1889self-explanatory. Both "fetch" filters remove the terminating NULL,
1890and both "store" filters add a terminating NULL.
1891
1892
1893=head2 Another Example -- Key is a C int.
1894
1895Here is another real-life example. By default, whenever Perl writes to
1896a DBM database it always writes the key and value as strings. So when
1897you use this:
1898
3c4b39be 1899 $hash{12345} = "something" ;
cad2e5aa
JH
1900
1901the key 12345 will get stored in the DBM database as the 5 byte string
1902"12345". If you actually want the key to be stored in the DBM database
1903as a C int, you will have to use C<pack> when writing, and C<unpack>
1904when reading.
1905
1906Here is a DBM Filter that does it:
1907
3245f058 1908 use warnings ;
cad2e5aa
JH
1909 use strict ;
1910 use DB_File ;
1911 my %hash ;
2359510d 1912 my $filename = "filt" ;
cad2e5aa
JH
1913 unlink $filename ;
1914
1915
1916 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1917 or die "Cannot open $filename: $!\n" ;
1918
1919 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1920 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1921 $hash{123} = "def" ;
1922 # ...
1923 undef $db ;
1924 untie %hash ;
1925
1926This time only two filters have been used -- we only need to manipulate
1927the contents of the key, so it wasn't necessary to install any value
1928filters.
1929
f6b705ef 1930=head1 HINTS AND TIPS
3b35bae3 1931
3b35bae3 1932
b90e71be 1933=head2 Locking: The Trouble with fd
3b35bae3 1934
b90e71be
GS
1935Until version 1.72 of this module, the recommended technique for locking
1936B<DB_File> databases was to flock the filehandle returned from the "fd"
1937function. Unfortunately this technique has been shown to be fundamentally
1938flawed (Kudos to David Harris for tracking this down). Use it at your own
1939peril!
3b35bae3 1940
b90e71be 1941The locking technique went like this.
cb1a09d0 1942
2359510d
SD
1943 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1944 || die "dbcreat foo.db $!";
b90e71be
GS
1945 $fd = $db->fd;
1946 open(DB_FH, "+<&=$fd") || die "dup $!";
1947 flock (DB_FH, LOCK_EX) || die "flock: $!";
1948 ...
1949 $db{"Tom"} = "Jerry" ;
1950 ...
1951 flock(DB_FH, LOCK_UN);
1952 undef $db;
1953 untie %db;
1954 close(DB_FH);
cb1a09d0 1955
b90e71be 1956In simple terms, this is what happens:
cb1a09d0 1957
b90e71be 1958=over 5
cb1a09d0 1959
b90e71be 1960=item 1.
cb1a09d0 1961
b90e71be 1962Use "tie" to open the database.
cb1a09d0 1963
b90e71be 1964=item 2.
cb1a09d0 1965
b90e71be 1966Lock the database with fd & flock.
cb1a09d0 1967
b90e71be 1968=item 3.
cb1a09d0 1969
b90e71be 1970Read & Write to the database.
cb1a09d0 1971
b90e71be 1972=item 4.
cb1a09d0 1973
b90e71be 1974Unlock and close the database.
cb1a09d0 1975
b90e71be
GS
1976=back
1977
1978Here is the crux of the problem. A side-effect of opening the B<DB_File>
1979database in step 2 is that an initial block from the database will get
1980read from disk and cached in memory.
1981
1982To see why this is a problem, consider what can happen when two processes,
1983say "A" and "B", both want to update the same B<DB_File> database
1984using the locking steps outlined above. Assume process "A" has already
1985opened the database and has a write lock, but it hasn't actually updated
1986the database yet (it has finished step 2, but not started step 3 yet). Now
1987process "B" tries to open the same database - step 1 will succeed,
1988but it will block on step 2 until process "A" releases the lock. The
1989important thing to notice here is that at this point in time both
1990processes will have cached identical initial blocks from the database.
1991
1992Now process "A" updates the database and happens to change some of the
1993data held in the initial buffer. Process "A" terminates, flushing
1994all cached data to disk and releasing the database lock. At this point
1995the database on disk will correctly reflect the changes made by process
1996"A".
1997
1998With the lock released, process "B" can now continue. It also updates the
1999database and unfortunately it too modifies the data that was in its
2000initial buffer. Once that data gets flushed to disk it will overwrite
2001some/all of the changes process "A" made to the database.
2002
2003The result of this scenario is at best a database that doesn't contain
2004what you expect. At worst the database will corrupt.
2005
2006The above won't happen every time competing process update the same
2007B<DB_File> database, but it does illustrate why the technique should
2008not be used.
2009
2010=head2 Safe ways to lock a database
2011
2012Starting with version 2.x, Berkeley DB has internal support for locking.
2013The companion module to this one, B<BerkeleyDB>, provides an interface
2014to this locking functionality. If you are serious about locking
2015Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
2016
2017If using B<BerkeleyDB> isn't an option, there are a number of modules
2018available on CPAN that can be used to implement locking. Each one
2019implements locking differently and has different goals in mind. It is
2020therefore worth knowing the difference, so that you can pick the right
2021one for your application. Here are the three locking wrappers:
2022
2023=over 5
2024
2025=item B<Tie::DB_Lock>
2026
2027A B<DB_File> wrapper which creates copies of the database file for
2028read access, so that you have a kind of a multiversioning concurrent read
2029system. However, updates are still serial. Use for databases where reads
2030may be lengthy and consistency problems may occur.
2031
2032=item B<Tie::DB_LockFile>
2033
2034A B<DB_File> wrapper that has the ability to lock and unlock the database
2035while it is being used. Avoids the tie-before-flock problem by simply
2036re-tie-ing the database when you get or drop a lock. Because of the
2037flexibility in dropping and re-acquiring the lock in the middle of a
2038session, this can be massaged into a system that will work with long
2039updates and/or reads if the application follows the hints in the POD
2040documentation.
2041
2042=item B<DB_File::Lock>
2043
2044An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
2045before tie-ing the database and drops the lock after the untie. Allows
2046one to use the same lockfile for multiple databases to avoid deadlock
2047problems, if desired. Use for databases where updates are reads are
2048quick and simple flock locking semantics are enough.
2049
2050=back
cb1a09d0 2051
68dc0745 2052=head2 Sharing Databases With C Applications
f6b705ef
PP
2053
2054There is no technical reason why a Berkeley DB database cannot be
2055shared by both a Perl and a C application.
2056
2057The vast majority of problems that are reported in this area boil down
2058to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2059not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef
PP
2060
2061Here is a real example. Netscape 2.0 keeps a record of the locations you
2062visit along with the time you last visited them in a DB_HASH database.
2063This is usually stored in the file F<~/.netscape/history.db>. The key
2064field in the database is the location string and the value field is the
2065time the location was last visited stored as a 4 byte binary value.
2066
2067If you haven't already guessed, the location string is stored with a
2068terminating NULL. This means you need to be careful when accessing the
2069database.
2070
2071Here is a snippet of code that is loosely based on Tom Christiansen's
2072I<ggh> script (available from your nearest CPAN archive in
2073F<authors/id/TOMC/scripts/nshist.gz>).
2074
3245f058 2075 use warnings ;
610ab055 2076 use strict ;
f6b705ef
PP
2077 use DB_File ;
2078 use Fcntl ;
f6b705ef 2079
962cee9f 2080 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef
PP
2081 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2082
2083 $HISTORY = "$dotdir/.netscape/history.db";
2084
2085 tie %hist_db, 'DB_File', $HISTORY
2086 or die "Cannot open $HISTORY: $!\n" ;;
2087
2088 # Dump the complete database
2089 while ( ($href, $binary_time) = each %hist_db ) {
2090
2091 # remove the terminating NULL
2092 $href =~ s/\x00$// ;
2093
2094 # convert the binary time into a user friendly string
2095 $date = localtime unpack("V", $binary_time);
2096 print "$date $href\n" ;
2097 }
2098
2099 # check for the existence of a specific key
2100 # remember to add the NULL
2101 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2102 $date = localtime unpack("V", $binary_time) ;
2103 print "Last visited mox.perl.com on $date\n" ;
2104 }
2105 else {
2106 print "Never visited mox.perl.com\n"
2107 }
2108
2109 untie %hist_db ;
2110
68dc0745 2111=head2 The untie() Gotcha
778183f3 2112
7a2e2cd6 2113If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2114recommended that you read L<perltie/The untie Gotcha>.
778183f3
PM
2115
2116Even if you don't currently make use of the API interface, it is still
2117worth reading it.
2118
2119Here is an example which illustrates the problem from a B<DB_File>
2120perspective:
2121
2122 use DB_File ;
2123 use Fcntl ;
2124
2125 my %x ;
2126 my $X ;
2127
2128 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2129 or die "Cannot tie first time: $!" ;
2130
2131 $x{123} = 456 ;
2132
2133 untie %x ;
2134
2135 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2136 or die "Cannot tie second time: $!" ;
2137
2138 untie %x ;
2139
2140When run, the script will produce this error message:
2141
2142 Cannot tie second time: Invalid argument at bad.file line 14.
2143
2144Although the error message above refers to the second tie() statement
2145in the script, the source of the problem is really with the untie()
2146statement that precedes it.
2147
2148Having read L<perltie> you will probably have already guessed that the
2149error is caused by the extra copy of the tied object stored in C<$X>.
2150If you haven't, then the problem boils down to the fact that the
2151B<DB_File> destructor, DESTROY, will not be called until I<all>
2152references to the tied object are destroyed. Both the tied variable,
2153C<%x>, and C<$X> above hold a reference to the object. The call to
2154untie() will destroy the first, but C<$X> still holds a valid
2155reference, so the destructor will not get called and the database file
2156F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2157attempt to open a database that is already open via the catch-all
778183f3
PM
2158"Invalid argument" doesn't help.
2159
2160If you run the script with the C<-w> flag the error message becomes:
2161
2162 untie attempted while 1 inner references still exist at bad.file line 12.
2163 Cannot tie second time: Invalid argument at bad.file line 14.
2164
2165which pinpoints the real problem. Finally the script can now be
2166modified to fix the original problem by destroying the API object
2167before the untie:
2168
2169 ...
2170 $x{123} = 456 ;
2171
2172 undef $X ;
2173 untie %x ;
2174
2175 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2176 ...
2177
f6b705ef
PP
2178
2179=head1 COMMON QUESTIONS
2180
2181=head2 Why is there Perl source in my database?
2182
2183If you look at the contents of a database file created by DB_File,
2184there can sometimes be part of a Perl script included in it.
2185
2186This happens because Berkeley DB uses dynamic memory to allocate
2187buffers which will subsequently be written to the database file. Being
2188dynamic, the memory could have been used for anything before DB
2189malloced it. As Berkeley DB doesn't clear the memory once it has been
2190allocated, the unused portions will contain random junk. In the case
2191where a Perl script gets written to the database, the random junk will
2192correspond to an area of dynamic memory that happened to be used during
2193the compilation of the script.
2194
2195Unless you don't like the possibility of there being part of your Perl
2196scripts embedded in a database file, this is nothing to worry about.
2197
2198=head2 How do I store complex data structures with DB_File?
2199
2200Although B<DB_File> cannot do this directly, there is a module which
2201can layer transparently over B<DB_File> to accomplish this feat.
2202
2203Check out the MLDBM module, available on CPAN in the directory
2204F<modules/by-module/MLDBM>.
2205
c7cd1ed9
JH
2206=head2 What does "wide character in subroutine entry" mean?
2207
2208You will usually get this message if you are working with UTF-8 data and
2209want to read/write it from/to a Berkeley DB database file.
2210
2211The easist way to deal with this issue is to use the pre-defined "utf8"
2212B<DBM_Filter> (see L<DBM_Filter>) that was designed to deal with this
2213situation.
2214
2215The example below shows what you need if I<both> the key and value are
2216expected to be in UTF-8.
2217
2218 use DB_File;
2219 use DBM_Filter;
2220
2221 my $db = tie %h, 'DB_File', '/tmp/try.db', O_CREAT|O_RDWR, 0666, $DB_BTREE;
2222 $db->Filter_Key_Push('utf8');
2223 $db->Filter_Value_Push('utf8');
2224
2225 my $key = "\N{LATIN SMALL LETTER A WITH ACUTE}";
2226 my $value = "\N{LATIN SMALL LETTER E WITH ACUTE}";
2227 $h{ $key } = $value;
2228
f6b705ef
PP
2229=head2 What does "Invalid Argument" mean?
2230
2231You will get this error message when one of the parameters in the
2232C<tie> call is wrong. Unfortunately there are quite a few parameters to
2233get wrong, so it can be difficult to figure out which one it is.
2234
2235Here are a couple of possibilities:
2236
2237=over 5
2238
2239=item 1.
2240
610ab055 2241Attempting to reopen a database without closing it.
f6b705ef
PP
2242
2243=item 2.
2244
2245Using the O_WRONLY flag.
2246
2247=back
2248
2249=head2 What does "Bareword 'DB_File' not allowed" mean?
2250
2251You will encounter this particular error message when you have the
2252C<strict 'subs'> pragma (or the full strict pragma) in your script.
2253Consider this script:
2254
3245f058 2255 use warnings ;
f6b705ef
PP
2256 use strict ;
2257 use DB_File ;
07200f1b 2258 my %x ;
f6b705ef
PP
2259 tie %x, DB_File, "filename" ;
2260
2261Running it produces the error in question:
2262
2263 Bareword "DB_File" not allowed while "strict subs" in use
2264
2265To get around the error, place the word C<DB_File> in either single or
2266double quotes, like this:
2267
2268 tie %x, "DB_File", "filename" ;
2269
2270Although it might seem like a real pain, it is really worth the effort
2271of having a C<use strict> in all your scripts.
2272
cad2e5aa
JH
2273=head1 REFERENCES
2274
2275Articles that are either about B<DB_File> or make use of it.
2276
2277=over 5
2278
2279=item 1.
2280
2281I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2282Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2283
2284=back
2285
cb1a09d0
AD
2286=head1 HISTORY
2287
1f70e1ea 2288Moved to the Changes file.
610ab055 2289
1f70e1ea 2290=head1 BUGS
05475680 2291
1f70e1ea
PM
2292Some older versions of Berkeley DB had problems with fixed length
2293records using the RECNO file format. This problem has been fixed since
2294version 1.85 of Berkeley DB.
e858de61 2295
1f70e1ea
PM
2296I am sure there are bugs in the code. If you do find any, or can
2297suggest any enhancements, I would welcome your comments.
a6ed719b 2298
1f70e1ea 2299=head1 AVAILABILITY
a6ed719b 2300
1f70e1ea
PM
2301B<DB_File> comes with the standard Perl source distribution. Look in
2302the directory F<ext/DB_File>. Given the amount of time between releases
2303of Perl the version that ships with Perl is quite likely to be out of
2304date, so the most recent version can always be found on CPAN (see
5bbd4290 2305L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2306F<modules/by-module/DB_File>.
a6ed719b 2307
039d031f
PM
2308This version of B<DB_File> will work with either version 1.x, 2.x or
23093.x of Berkeley DB, but is limited to the functionality provided by
2310version 1.
a6ed719b 2311
10261a36 2312The official web site for Berkeley DB is F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>.
039d031f 2313All versions of Berkeley DB are available there.
93af7a87 2314
1f70e1ea
PM
2315Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2316archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2317
1f70e1ea 2318=head1 COPYRIGHT
3b35bae3 2319
c7cd1ed9 2320Copyright (c) 1995-2016 Paul Marquess. All rights reserved. This program
a9fd575d
PM
2321is free software; you can redistribute it and/or modify it under the
2322same terms as Perl itself.
3b35bae3 2323
1f70e1ea
PM
2324Although B<DB_File> is covered by the Perl license, the library it
2325makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2326copyright and its own license. Please take the time to read it.
3b35bae3 2327
10af739e 2328Here are a few words taken from the Berkeley DB FAQ (at
10261a36 2329F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license:
68dc0745 2330
a9fd575d 2331 Do I have to license DB to use it in Perl scripts?
3b35bae3 2332
a9fd575d
PM
2333 No. The Berkeley DB license requires that software that uses
2334 Berkeley DB be freely redistributable. In the case of Perl, that
2335 software is Perl, and not your scripts. Any Perl scripts that you
2336 write are your property, including scripts that make use of
2337 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2338 place any restriction on what you may do with them.
88108326 2339
1f70e1ea
PM
2340If you are in any doubt about the license situation, contact either the
2341Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1
PM
2342
2343
3b35bae3
AD
2344=head1 SEE ALSO
2345
5bbd4290 2346L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
c7cd1ed9 2347L<perldbmfilter>, L<DBM_Filter>
3b35bae3 2348
3b35bae3
AD
2349=head1 AUTHOR
2350
8e07c86e 2351The DB_File interface was written by Paul Marquess
5bbd4290 2352E<lt>pmqs@cpan.orgE<gt>.
3b35bae3
AD
2353
2354=cut