This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Test file had been renamed. remove old cruft
[perl5.git] / cpan / DB_File / DB_File.pm
CommitLineData
a0d0e21e
LW
1# DB_File.pm -- Perl 5 interface to Berkeley DB
2#
b6990ae0 3# Written by Paul Marquess (pmqs@cpan.org)
36477c24 4#
b6990ae0 5# Copyright (c) 1995-2013 Paul Marquess. All rights reserved.
36477c24 6# This program is free software; you can redistribute it and/or
7# modify it under the same terms as Perl itself.
8
8e07c86e
AD
9
10package DB_File::HASHINFO ;
785da04d 11
c1c1a1b2 12require 5.00504;
610ab055 13
3245f058 14use warnings;
785da04d 15use strict;
8e07c86e 16use Carp;
88108326 17require Tie::Hash;
18@DB_File::HASHINFO::ISA = qw(Tie::Hash);
8e07c86e 19
88108326 20sub new
8e07c86e 21{
88108326 22 my $pkg = shift ;
23 my %x ;
24 tie %x, $pkg ;
25 bless \%x, $pkg ;
8e07c86e
AD
26}
27
610ab055 28
88108326 29sub TIEHASH
30{
31 my $pkg = shift ;
32
efc79c7d
PM
33 bless { VALID => {
34 bsize => 1,
35 ffactor => 1,
36 nelem => 1,
37 cachesize => 1,
38 hash => 2,
39 lorder => 1,
36477c24 40 },
41 GOT => {}
42 }, $pkg ;
88108326 43}
8e07c86e 44
610ab055 45
8e07c86e
AD
46sub FETCH
47{
88108326 48 my $self = shift ;
49 my $key = shift ;
8e07c86e 50
36477c24 51 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ;
88108326 52
53 my $pkg = ref $self ;
54 croak "${pkg}::FETCH - Unknown element '$key'" ;
8e07c86e
AD
55}
56
57
58sub STORE
59{
88108326 60 my $self = shift ;
61 my $key = shift ;
62 my $value = shift ;
63
efc79c7d
PM
64 my $type = $self->{VALID}{$key};
65
66 if ( $type )
8e07c86e 67 {
efc79c7d
PM
68 croak "Key '$key' not associated with a code reference"
69 if $type == 2 && !ref $value && ref $value ne 'CODE';
36477c24 70 $self->{GOT}{$key} = $value ;
8e07c86e
AD
71 return ;
72 }
73
88108326 74 my $pkg = ref $self ;
75 croak "${pkg}::STORE - Unknown element '$key'" ;
8e07c86e
AD
76}
77
78sub DELETE
79{
88108326 80 my $self = shift ;
81 my $key = shift ;
82
36477c24 83 if ( exists $self->{VALID}{$key} )
8e07c86e 84 {
36477c24 85 delete $self->{GOT}{$key} ;
8e07c86e
AD
86 return ;
87 }
88
88108326 89 my $pkg = ref $self ;
90 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ;
8e07c86e
AD
91}
92
88108326 93sub EXISTS
8e07c86e 94{
88108326 95 my $self = shift ;
96 my $key = shift ;
8e07c86e 97
36477c24 98 exists $self->{VALID}{$key} ;
8e07c86e
AD
99}
100
88108326 101sub NotHere
8e07c86e 102{
18d2dc8c 103 my $self = shift ;
88108326 104 my $method = shift ;
8e07c86e 105
18d2dc8c 106 croak ref($self) . " does not define the method ${method}" ;
8e07c86e
AD
107}
108
18d2dc8c
PM
109sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") }
110sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") }
111sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") }
8e07c86e
AD
112
113package DB_File::RECNOINFO ;
785da04d 114
3245f058 115use warnings;
88108326 116use strict ;
117
045291aa 118@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e
AD
119
120sub TIEHASH
121{
88108326 122 my $pkg = shift ;
123
36477c24 124 bless { VALID => { map {$_, 1}
125 qw( bval cachesize psize flags lorder reclen bfname )
126 },
127 GOT => {},
128 }, $pkg ;
8e07c86e
AD
129}
130
88108326 131package DB_File::BTREEINFO ;
8e07c86e 132
3245f058 133use warnings;
88108326 134use strict ;
8e07c86e 135
88108326 136@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ;
8e07c86e 137
88108326 138sub TIEHASH
8e07c86e 139{
88108326 140 my $pkg = shift ;
141
efc79c7d
PM
142 bless { VALID => {
143 flags => 1,
144 cachesize => 1,
145 maxkeypage => 1,
146 minkeypage => 1,
147 psize => 1,
148 compare => 2,
149 prefix => 2,
150 lorder => 1,
36477c24 151 },
152 GOT => {},
153 }, $pkg ;
8e07c86e
AD
154}
155
156
8e07c86e 157package DB_File ;
785da04d 158
3245f058 159use warnings;
785da04d 160use strict;
07200f1b 161our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO);
ebf49c8b 162our ($db_version, $use_XSLoader, $splice_end_array_no_length, $splice_end_array, $Error);
8e07c86e
AD
163use Carp;
164
785da04d 165
16f3356e 166$VERSION = "1.831" ;
083e9212 167$VERSION = eval $VERSION; # needed for dev releases
d85a743d
PM
168
169{
82c92bb0 170 local $SIG{__WARN__} = sub {$splice_end_array_no_length = join(" ",@_);};
d85a743d 171 my @a =(1); splice(@a, 3);
ebf49c8b
CBW
172 $splice_end_array_no_length =
173 ($splice_end_array_no_length =~ /^splice\(\) offset past end of array at /);
174}
175{
82c92bb0 176 local $SIG{__WARN__} = sub {$splice_end_array = join(" ", @_);};
ebf49c8b 177 my @a =(1); splice(@a, 3, 1);
d85a743d
PM
178 $splice_end_array =
179 ($splice_end_array =~ /^splice\(\) offset past end of array at /);
180}
8e07c86e
AD
181
182#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
88108326 183$DB_BTREE = new DB_File::BTREEINFO ;
184$DB_HASH = new DB_File::HASHINFO ;
185$DB_RECNO = new DB_File::RECNOINFO ;
8e07c86e 186
785da04d 187require Tie::Hash;
8e07c86e 188require Exporter;
b90e71be
GS
189BEGIN {
190 $use_XSLoader = 1 ;
e5021521 191 { local $SIG{__DIE__} ; eval { require XSLoader } ; }
b90e71be
GS
192
193 if ($@) {
194 $use_XSLoader = 0 ;
195 require DynaLoader;
196 @ISA = qw(DynaLoader);
197 }
198}
199
200push @ISA, qw(Tie::Hash Exporter);
8e07c86e
AD
201@EXPORT = qw(
202 $DB_BTREE $DB_HASH $DB_RECNO
88108326 203
8e07c86e
AD
204 BTREEMAGIC
205 BTREEVERSION
206 DB_LOCK
207 DB_SHMEM
208 DB_TXN
209 HASHMAGIC
210 HASHVERSION
211 MAX_PAGE_NUMBER
212 MAX_PAGE_OFFSET
213 MAX_REC_NUMBER
214 RET_ERROR
215 RET_SPECIAL
216 RET_SUCCESS
217 R_CURSOR
218 R_DUP
219 R_FIRST
220 R_FIXEDLEN
221 R_IAFTER
222 R_IBEFORE
223 R_LAST
224 R_NEXT
225 R_NOKEY
226 R_NOOVERWRITE
227 R_PREV
228 R_RECNOSYNC
229 R_SETCURSOR
230 R_SNAPSHOT
231 __R_UNUSED
88108326 232
045291aa 233);
8e07c86e
AD
234
235sub AUTOLOAD {
785da04d 236 my($constname);
8e07c86e 237 ($constname = $AUTOLOAD) =~ s/.*:://;
07200f1b
PM
238 my ($error, $val) = constant($constname);
239 Carp::croak $error if $error;
57c77851
JS
240 no strict 'refs';
241 *{$AUTOLOAD} = sub { $val };
242 goto &{$AUTOLOAD};
07200f1b 243}
8e07c86e 244
f6b705ef 245
a6ed719b 246eval {
1f70e1ea
PM
247 # Make all Fcntl O_XXX constants available for importing
248 require Fcntl;
249 my @O = grep /^O_/, @Fcntl::EXPORT;
250 Fcntl->import(@O); # first we import what we want to export
251 push(@EXPORT, @O);
a6ed719b 252};
f6b705ef 253
b90e71be
GS
254if ($use_XSLoader)
255 { XSLoader::load("DB_File", $VERSION)}
256else
257 { bootstrap DB_File $VERSION }
8e07c86e 258
05475680 259sub tie_hash_or_array
610ab055
PM
260{
261 my (@arg) = @_ ;
05475680 262 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ;
610ab055 263
f1aa208b
RGS
264 use File::Spec;
265 $arg[1] = File::Spec->rel2abs($arg[1])
266 if defined $arg[1] ;
267
610ab055
PM
268 $arg[4] = tied %{ $arg[4] }
269 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ;
270
efc79c7d
PM
271 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2];
272 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3];
273
9c095db2
PM
274 # make recno in Berkeley DB version 2 (or better) work like
275 # recno in version 1.
d6067fe3
SP
276 if ($db_version >= 4 and ! $tieHASH) {
277 $arg[2] |= O_CREAT();
278 }
279
1f70e1ea
PM
280 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and
281 $arg[1] and ! -e $arg[1]) {
282 open(FH, ">$arg[1]") or return undef ;
283 close FH ;
284 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ;
285 }
286
05475680 287 DoTie_($tieHASH, @arg) ;
610ab055
PM
288}
289
05475680
PM
290sub TIEHASH
291{
292 tie_hash_or_array(@_) ;
293}
294
295sub TIEARRAY
296{
297 tie_hash_or_array(@_) ;
298}
88108326 299
045291aa
PM
300sub CLEAR
301{
1f70e1ea 302 my $self = shift;
3245f058 303 my $key = 0 ;
1f70e1ea
PM
304 my $value = "" ;
305 my $status = $self->seq($key, $value, R_FIRST());
306 my @keys;
307
308 while ($status == 0) {
309 push @keys, $key;
310 $status = $self->seq($key, $value, R_NEXT());
311 }
312 foreach $key (reverse @keys) {
313 my $s = $self->del($key);
314 }
315}
316
045291aa
PM
317sub EXTEND { }
318
319sub STORESIZE
320{
321 my $self = shift;
322 my $length = shift ;
323 my $current_length = $self->length() ;
324
325 if ($length < $current_length) {
326 my $key ;
327 for ($key = $current_length - 1 ; $key >= $length ; -- $key)
328 { $self->del($key) }
329 }
a9fd575d
PM
330 elsif ($length > $current_length) {
331 $self->put($length-1, "") ;
332 }
045291aa
PM
333}
334
c5da4faf
PM
335
336sub SPLICE
337{
338 my $self = shift;
339 my $offset = shift;
340 if (not defined $offset) {
d85a743d 341 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
342 $offset = 0;
343 }
344
ebf49c8b 345 my $has_length = @_;
c5da4faf
PM
346 my $length = @_ ? shift : 0;
347 # Carping about definedness comes _after_ the OFFSET sanity check.
348 # This is so we get the same error messages as Perl's splice().
349 #
350
351 my @list = @_;
352
353 my $size = $self->FETCHSIZE();
354
355 # 'If OFFSET is negative then it start that far from the end of
356 # the array.'
357 #
358 if ($offset < 0) {
359 my $new_offset = $size + $offset;
360 if ($new_offset < 0) {
361 die "Modification of non-creatable array value attempted, "
362 . "subscript $offset";
363 }
364 $offset = $new_offset;
365 }
366
c5da4faf 367 if (not defined $length) {
d85a743d 368 warnings::warnif('uninitialized', 'Use of uninitialized value in splice');
c5da4faf
PM
369 $length = 0;
370 }
371
d85a743d
PM
372 if ($offset > $size) {
373 $offset = $size;
374 warnings::warnif('misc', 'splice() offset past end of array')
ebf49c8b 375 if $has_length ? $splice_end_array : $splice_end_array_no_length;
d85a743d
PM
376 }
377
c5da4faf
PM
378 # 'If LENGTH is omitted, removes everything from OFFSET onward.'
379 if (not defined $length) {
380 $length = $size - $offset;
381 }
382
383 # 'If LENGTH is negative, leave that many elements off the end of
384 # the array.'
385 #
386 if ($length < 0) {
387 $length = $size - $offset + $length;
388
389 if ($length < 0) {
390 # The user must have specified a length bigger than the
391 # length of the array passed in. But perl's splice()
392 # doesn't catch this, it just behaves as for length=0.
393 #
394 $length = 0;
395 }
396 }
397
398 if ($length > $size - $offset) {
399 $length = $size - $offset;
400 }
401
402 # $num_elems holds the current number of elements in the database.
403 my $num_elems = $size;
404
405 # 'Removes the elements designated by OFFSET and LENGTH from an
406 # array,'...
407 #
408 my @removed = ();
409 foreach (0 .. $length - 1) {
410 my $old;
411 my $status = $self->get($offset, $old);
412 if ($status != 0) {
413 my $msg = "error from Berkeley DB on get($offset, \$old)";
414 if ($status == 1) {
415 $msg .= ' (no such element?)';
416 }
417 else {
418 $msg .= ": error status $status";
419 if (defined $! and $! ne '') {
420 $msg .= ", message $!";
421 }
422 }
423 die $msg;
424 }
425 push @removed, $old;
426
427 $status = $self->del($offset);
428 if ($status != 0) {
429 my $msg = "error from Berkeley DB on del($offset)";
430 if ($status == 1) {
431 $msg .= ' (no such element?)';
432 }
433 else {
434 $msg .= ": error status $status";
435 if (defined $! and $! ne '') {
436 $msg .= ", message $!";
437 }
438 }
439 die $msg;
440 }
441
442 -- $num_elems;
443 }
444
445 # ...'and replaces them with the elements of LIST, if any.'
446 my $pos = $offset;
447 while (defined (my $elem = shift @list)) {
448 my $old_pos = $pos;
449 my $status;
450 if ($pos >= $num_elems) {
451 $status = $self->put($pos, $elem);
452 }
453 else {
454 $status = $self->put($pos, $elem, $self->R_IBEFORE);
455 }
456
457 if ($status != 0) {
458 my $msg = "error from Berkeley DB on put($pos, $elem, ...)";
459 if ($status == 1) {
460 $msg .= ' (no such element?)';
461 }
462 else {
463 $msg .= ", error status $status";
464 if (defined $! and $! ne '') {
465 $msg .= ", message $!";
466 }
467 }
468 die $msg;
469 }
470
471 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE"
472 if $old_pos != $pos;
473
474 ++ $pos;
475 ++ $num_elems;
476 }
477
478 if (wantarray) {
479 # 'In list context, returns the elements removed from the
480 # array.'
481 #
482 return @removed;
483 }
484 elsif (defined wantarray and not wantarray) {
485 # 'In scalar context, returns the last element removed, or
486 # undef if no elements are removed.'
487 #
488 if (@removed) {
489 my $last = pop @removed;
490 return "$last";
491 }
492 else {
493 return undef;
494 }
495 }
496 elsif (not defined wantarray) {
497 # Void context
498 }
499 else { die }
500}
501sub ::DB_File::splice { &SPLICE }
502
6ca2e664
PM
503sub find_dup
504{
505 croak "Usage: \$db->find_dup(key,value)\n"
506 unless @_ == 3 ;
507
508 my $db = shift ;
509 my ($origkey, $value_wanted) = @_ ;
510 my ($key, $value) = ($origkey, 0);
511 my ($status) = 0 ;
512
513 for ($status = $db->seq($key, $value, R_CURSOR() ) ;
514 $status == 0 ;
515 $status = $db->seq($key, $value, R_NEXT() ) ) {
516
517 return 0 if $key eq $origkey and $value eq $value_wanted ;
518 }
519
520 return $status ;
521}
522
523sub del_dup
524{
525 croak "Usage: \$db->del_dup(key,value)\n"
526 unless @_ == 3 ;
527
528 my $db = shift ;
529 my ($key, $value) = @_ ;
530 my ($status) = $db->find_dup($key, $value) ;
531 return $status if $status != 0 ;
532
533 $status = $db->del($key, R_CURSOR() ) ;
534 return $status ;
535}
536
88108326 537sub get_dup
538{
539 croak "Usage: \$db->get_dup(key [,flag])\n"
540 unless @_ == 2 or @_ == 3 ;
541
542 my $db = shift ;
543 my $key = shift ;
544 my $flag = shift ;
f6b705ef 545 my $value = 0 ;
88108326 546 my $origkey = $key ;
547 my $wantarray = wantarray ;
f6b705ef 548 my %values = () ;
88108326 549 my @values = () ;
550 my $counter = 0 ;
f6b705ef 551 my $status = 0 ;
88108326 552
f6b705ef 553 # iterate through the database until either EOF ($status == 0)
554 # or a different key is encountered ($key ne $origkey).
555 for ($status = $db->seq($key, $value, R_CURSOR()) ;
556 $status == 0 and $key eq $origkey ;
557 $status = $db->seq($key, $value, R_NEXT()) ) {
88108326 558
f6b705ef 559 # save the value or count number of matches
560 if ($wantarray) {
561 if ($flag)
562 { ++ $values{$value} }
563 else
564 { push (@values, $value) }
565 }
566 else
567 { ++ $counter }
88108326 568
88108326 569 }
570
f6b705ef 571 return ($wantarray ? ($flag ? %values : @values) : $counter) ;
88108326 572}
573
574
a30cae0b
CBW
575sub STORABLE_freeze
576{
577 my $type = ref shift;
578 croak "Cannot freeze $type object\n";
579}
580
581sub STORABLE_thaw
582{
583 my $type = ref shift;
584 croak "Cannot thaw $type object\n";
585}
586
587
588
8e07c86e
AD
5891;
590__END__
591
3b35bae3
AD
592=head1 NAME
593
1f70e1ea 594DB_File - Perl5 access to Berkeley DB version 1.x
3b35bae3
AD
595
596=head1 SYNOPSIS
597
bbc7dcd2
MS
598 use DB_File;
599
88108326 600 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ;
601 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ;
602 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ;
760ac839 603
3b35bae3
AD
604 $status = $X->del($key [, $flags]) ;
605 $status = $X->put($key, $value [, $flags]) ;
606 $status = $X->get($key, $value [, $flags]) ;
760ac839 607 $status = $X->seq($key, $value, $flags) ;
3b35bae3
AD
608 $status = $X->sync([$flags]) ;
609 $status = $X->fd ;
760ac839 610
f6b705ef 611 # BTREE only
88108326 612 $count = $X->get_dup($key) ;
613 @list = $X->get_dup($key) ;
614 %list = $X->get_dup($key, 1) ;
6ca2e664
PM
615 $status = $X->find_dup($key, $value) ;
616 $status = $X->del_dup($key, $value) ;
88108326 617
f6b705ef 618 # RECNO only
619 $a = $X->length;
620 $a = $X->pop ;
621 $X->push(list);
622 $a = $X->shift;
623 $X->unshift(list);
c5da4faf 624 @r = $X->splice(offset, length, elements);
f6b705ef 625
cad2e5aa
JH
626 # DBM Filters
627 $old_filter = $db->filter_store_key ( sub { ... } ) ;
628 $old_filter = $db->filter_store_value( sub { ... } ) ;
629 $old_filter = $db->filter_fetch_key ( sub { ... } ) ;
630 $old_filter = $db->filter_fetch_value( sub { ... } ) ;
631
3b35bae3
AD
632 untie %hash ;
633 untie @array ;
634
635=head1 DESCRIPTION
636
8e07c86e 637B<DB_File> is a module which allows Perl programs to make use of the
1f70e1ea 638facilities provided by Berkeley DB version 1.x (if you have a newer
0d735f06 639version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>).
039d031f
PM
640It is assumed that you have a copy of the Berkeley DB manual pages at
641hand when reading this documentation. The interface defined here
642mirrors the Berkeley DB interface closely.
68dc0745 643
8e07c86e
AD
644Berkeley DB is a C library which provides a consistent interface to a
645number of database formats. B<DB_File> provides an interface to all
646three of the database types currently supported by Berkeley DB.
3b35bae3
AD
647
648The file types are:
649
650=over 5
651
88108326 652=item B<DB_HASH>
3b35bae3 653
88108326 654This database type allows arbitrary key/value pairs to be stored in data
8e07c86e
AD
655files. This is equivalent to the functionality provided by other
656hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though,
657the files created using DB_HASH are not compatible with any of the
658other packages mentioned.
3b35bae3 659
8e07c86e
AD
660A default hashing algorithm, which will be adequate for most
661applications, is built into Berkeley DB. If you do need to use your own
662hashing algorithm it is possible to write your own in Perl and have
663B<DB_File> use it instead.
3b35bae3 664
88108326 665=item B<DB_BTREE>
666
667The btree format allows arbitrary key/value pairs to be stored in a
8e07c86e 668sorted, balanced binary tree.
3b35bae3 669
8e07c86e
AD
670As with the DB_HASH format, it is possible to provide a user defined
671Perl routine to perform the comparison of keys. By default, though, the
672keys are stored in lexical order.
3b35bae3 673
88108326 674=item B<DB_RECNO>
3b35bae3 675
8e07c86e
AD
676DB_RECNO allows both fixed-length and variable-length flat text files
677to be manipulated using the same key/value pair interface as in DB_HASH
678and DB_BTREE. In this case the key will consist of a record (line)
679number.
3b35bae3
AD
680
681=back
682
e5021521 683=head2 Using DB_File with Berkeley DB version 2 or greater
1f70e1ea
PM
684
685Although B<DB_File> is intended to be used with Berkeley DB version 1,
e5021521 686it can also be used with version 2, 3 or 4. In this case the interface is
1f70e1ea 687limited to the functionality provided by Berkeley DB 1.x. Anywhere the
e5021521 688version 2 or greater interface differs, B<DB_File> arranges for it to work
039d031f 689like version 1. This feature allows B<DB_File> scripts that were built
e5021521 690with version 1 to be migrated to version 2 or greater without any changes.
1f70e1ea
PM
691
692If you want to make use of the new features available in Berkeley DB
b90e71be 6932.x or greater, use the Perl module B<BerkeleyDB> instead.
1f70e1ea 694
e5021521
JH
695B<Note:> The database file format has changed multiple times in Berkeley
696DB version 2, 3 and 4. If you cannot recreate your databases, you
697must dump any existing databases with either the C<db_dump> or the
698C<db_dump185> utility that comes with Berkeley DB.
699Once you have rebuilt DB_File to use Berkeley DB version 2 or greater,
700your databases can be recreated using C<db_load>. Refer to the Berkeley DB
1f70e1ea
PM
701documentation for further details.
702
e5021521 703Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley
039d031f 704DB with DB_File.
1f70e1ea 705
68dc0745 706=head2 Interface to Berkeley DB
3b35bae3
AD
707
708B<DB_File> allows access to Berkeley DB files using the tie() mechanism
8e07c86e
AD
709in Perl 5 (for full details, see L<perlfunc/tie()>). This facility
710allows B<DB_File> to access Berkeley DB files using either an
711associative array (for DB_HASH & DB_BTREE file types) or an ordinary
712array (for the DB_RECNO file type).
3b35bae3 713
88108326 714In addition to the tie() interface, it is also possible to access most
715of the functions provided in the Berkeley DB API directly.
f6b705ef 716See L<THE API INTERFACE>.
3b35bae3 717
88108326 718=head2 Opening a Berkeley DB Database File
3b35bae3 719
8e07c86e 720Berkeley DB uses the function dbopen() to open or create a database.
f6b705ef 721Here is the C prototype for dbopen():
3b35bae3
AD
722
723 DB*
724 dbopen (const char * file, int flags, int mode,
725 DBTYPE type, const void * openinfo)
726
727The parameter C<type> is an enumeration which specifies which of the 3
728interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used.
729Depending on which of these is actually chosen, the final parameter,
730I<openinfo> points to a data structure which allows tailoring of the
731specific interface method.
732
8e07c86e 733This interface is handled slightly differently in B<DB_File>. Here is
88108326 734an equivalent call using B<DB_File>:
3b35bae3 735
88108326 736 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ;
3b35bae3 737
8e07c86e
AD
738The C<filename>, C<flags> and C<mode> parameters are the direct
739equivalent of their dbopen() counterparts. The final parameter $DB_HASH
740performs the function of both the C<type> and C<openinfo> parameters in
741dbopen().
3b35bae3 742
88108326 743In the example above $DB_HASH is actually a pre-defined reference to a
744hash object. B<DB_File> has three of these pre-defined references.
745Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO.
3b35bae3 746
8e07c86e
AD
747The keys allowed in each of these pre-defined references is limited to
748the names used in the equivalent C structure. So, for example, the
749$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>,
88108326 750C<ffactor>, C<hash>, C<lorder> and C<nelem>.
751
752To change one of these elements, just assign to it like this:
753
754 $DB_HASH->{'cachesize'} = 10000 ;
755
756The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are
757usually adequate for most applications. If you do need to create extra
758instances of these objects, constructors are available for each file
759type.
760
761Here are examples of the constructors and the valid options available
762for DB_HASH, DB_BTREE and DB_RECNO respectively.
763
764 $a = new DB_File::HASHINFO ;
765 $a->{'bsize'} ;
766 $a->{'cachesize'} ;
767 $a->{'ffactor'};
768 $a->{'hash'} ;
769 $a->{'lorder'} ;
770 $a->{'nelem'} ;
771
772 $b = new DB_File::BTREEINFO ;
773 $b->{'flags'} ;
774 $b->{'cachesize'} ;
775 $b->{'maxkeypage'} ;
776 $b->{'minkeypage'} ;
777 $b->{'psize'} ;
778 $b->{'compare'} ;
779 $b->{'prefix'} ;
780 $b->{'lorder'} ;
781
782 $c = new DB_File::RECNOINFO ;
783 $c->{'bval'} ;
784 $c->{'cachesize'} ;
785 $c->{'psize'} ;
786 $c->{'flags'} ;
787 $c->{'lorder'} ;
788 $c->{'reclen'} ;
789 $c->{'bfname'} ;
790
791The values stored in the hashes above are mostly the direct equivalent
792of their C counterpart. Like their C counterparts, all are set to a
f6b705ef 793default values - that means you don't have to set I<all> of the
88108326 794values when you only want to change one. Here is an example:
795
796 $a = new DB_File::HASHINFO ;
797 $a->{'cachesize'} = 12345 ;
798 tie %y, 'DB_File', "filename", $flags, 0777, $a ;
799
36477c24 800A few of the options need extra discussion here. When used, the C
88108326 801equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers
802to C functions. In B<DB_File> these keys are used to store references
803to Perl subs. Below are templates for each of the subs:
804
805 sub hash
806 {
807 my ($data) = @_ ;
808 ...
809 # return the hash value for $data
810 return $hash ;
811 }
3b35bae3 812
88108326 813 sub compare
814 {
815 my ($key, $key2) = @_ ;
816 ...
817 # return 0 if $key1 eq $key2
818 # -1 if $key1 lt $key2
819 # 1 if $key1 gt $key2
820 return (-1 , 0 or 1) ;
821 }
3b35bae3 822
88108326 823 sub prefix
824 {
825 my ($key, $key2) = @_ ;
826 ...
827 # return number of bytes of $key2 which are
828 # necessary to determine that it is greater than $key1
829 return $bytes ;
830 }
3b35bae3 831
f6b705ef 832See L<Changing the BTREE sort order> for an example of using the
833C<compare> template.
88108326 834
36477c24 835If you are using the DB_RECNO interface and you intend making use of
9a2c4ce3 836C<bval>, you should check out L<The 'bval' Option>.
36477c24 837
88108326 838=head2 Default Parameters
839
840It is possible to omit some or all of the final 4 parameters in the
841call to C<tie> and let them take default values. As DB_HASH is the most
842common file format used, the call:
843
844 tie %A, "DB_File", "filename" ;
845
846is equivalent to:
847
18d2dc8c 848 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 849
850It is also possible to omit the filename parameter as well, so the
851call:
852
853 tie %A, "DB_File" ;
854
855is equivalent to:
856
18d2dc8c 857 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ;
88108326 858
f6b705ef 859See L<In Memory Databases> for a discussion on the use of C<undef>
88108326 860in place of a filename.
861
f6b705ef 862=head2 In Memory Databases
863
864Berkeley DB allows the creation of in-memory databases by using NULL
865(that is, a C<(char *)0> in C) in place of the filename. B<DB_File>
866uses C<undef> instead of NULL to provide this functionality.
867
868=head1 DB_HASH
869
870The DB_HASH file format is probably the most commonly used of the three
871file formats that B<DB_File> supports. It is also very straightforward
872to use.
873
68dc0745 874=head2 A Simple Example
f6b705ef 875
876This example shows how to create a database, add key/value pairs to the
877database, delete keys/value pairs and finally how to enumerate the
878contents of the database.
879
3245f058 880 use warnings ;
610ab055 881 use strict ;
f6b705ef 882 use DB_File ;
07200f1b 883 our (%h, $k, $v) ;
f6b705ef 884
2c2d71f5 885 unlink "fruit" ;
45a340cb 886 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH
f6b705ef 887 or die "Cannot open file 'fruit': $!\n";
888
889 # Add a few key/value pairs to the file
890 $h{"apple"} = "red" ;
891 $h{"orange"} = "orange" ;
892 $h{"banana"} = "yellow" ;
893 $h{"tomato"} = "red" ;
894
895 # Check for existence of a key
896 print "Banana Exists\n\n" if $h{"banana"} ;
897
898 # Delete a key/value pair.
899 delete $h{"apple"} ;
900
901 # print the contents of the file
902 while (($k, $v) = each %h)
903 { print "$k -> $v\n" }
904
905 untie %h ;
906
907here is the output:
908
909 Banana Exists
bbc7dcd2 910
f6b705ef 911 orange -> orange
912 tomato -> red
913 banana -> yellow
914
915Note that the like ordinary associative arrays, the order of the keys
916retrieved is in an apparently random order.
917
918=head1 DB_BTREE
919
920The DB_BTREE format is useful when you want to store data in a given
921order. By default the keys will be stored in lexical order, but as you
922will see from the example shown in the next section, it is very easy to
923define your own sorting function.
924
925=head2 Changing the BTREE sort order
926
927This script shows how to override the default sorting algorithm that
928BTREE uses. Instead of using the normal lexical ordering, a case
929insensitive compare function will be used.
88108326 930
3245f058 931 use warnings ;
610ab055 932 use strict ;
f6b705ef 933 use DB_File ;
610ab055
PM
934
935 my %h ;
f6b705ef 936
937 sub Compare
938 {
939 my ($key1, $key2) = @_ ;
940 "\L$key1" cmp "\L$key2" ;
941 }
942
943 # specify the Perl sub that will do the comparison
944 $DB_BTREE->{'compare'} = \&Compare ;
945
2c2d71f5 946 unlink "tree" ;
45a340cb 947 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 948 or die "Cannot open file 'tree': $!\n" ;
949
950 # Add a key/value pair to the file
951 $h{'Wall'} = 'Larry' ;
952 $h{'Smith'} = 'John' ;
953 $h{'mouse'} = 'mickey' ;
954 $h{'duck'} = 'donald' ;
955
956 # Delete
957 delete $h{"duck"} ;
958
959 # Cycle through the keys printing them in order.
960 # Note it is not necessary to sort the keys as
961 # the btree will have kept them in order automatically.
962 foreach (keys %h)
963 { print "$_\n" }
964
965 untie %h ;
966
967Here is the output from the code above.
968
969 mouse
970 Smith
971 Wall
972
973There are a few point to bear in mind if you want to change the
974ordering in a BTREE database:
975
976=over 5
977
978=item 1.
979
980The new compare function must be specified when you create the database.
981
982=item 2.
983
984You cannot change the ordering once the database has been created. Thus
985you must use the same compare function every time you access the
88108326 986database.
987
39793c41
PM
988=item 3
989
990Duplicate keys are entirely defined by the comparison function.
991In the case-insensitive example above, the keys: 'KEY' and 'key'
992would be considered duplicates, and assigning to the second one
52ffee89 993would overwrite the first. If duplicates are allowed for (with the
59e51af5 994R_DUP flag discussed below), only a single copy of duplicate keys
39793c41
PM
995is stored in the database --- so (again with example above) assigning
996three values to the keys: 'KEY', 'Key', and 'key' would leave just
997the first key: 'KEY' in the database with three values. For some
998situations this results in information loss, so care should be taken
999to provide fully qualified comparison functions when necessary.
1000For example, the above comparison routine could be modified to
1001additionally compare case-sensitively if two keys are equal in the
1002case insensitive comparison:
1003
1004 sub compare {
1005 my($key1, $key2) = @_;
1006 lc $key1 cmp lc $key2 ||
1007 $key1 cmp $key2;
1008 }
1009
1010And now you will only have duplicates when the keys themselves
1011are truly the same. (note: in versions of the db library prior to
1012about November 1996, such duplicate keys were retained so it was
1013possible to recover the original keys in sets of keys that
1014compared as equal).
1015
1016
f6b705ef 1017=back
1018
68dc0745 1019=head2 Handling Duplicate Keys
f6b705ef 1020
1021The BTREE file type optionally allows a single key to be associated
1022with an arbitrary number of values. This option is enabled by setting
1023the flags element of C<$DB_BTREE> to R_DUP when creating the database.
1024
88108326 1025There are some difficulties in using the tied hash interface if you
1026want to manipulate a BTREE database with duplicate keys. Consider this
1027code:
1028
3245f058 1029 use warnings ;
610ab055 1030 use strict ;
88108326 1031 use DB_File ;
610ab055 1032
962cee9f 1033 my ($filename, %h) ;
610ab055 1034
88108326 1035 $filename = "tree" ;
1036 unlink $filename ;
bbc7dcd2 1037
88108326 1038 # Enable duplicate records
1039 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1040
45a340cb 1041 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1042 or die "Cannot open $filename: $!\n";
bbc7dcd2 1043
88108326 1044 # Add some key/value pairs to the file
1045 $h{'Wall'} = 'Larry' ;
1046 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1047 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1048 $h{'Smith'} = 'John' ;
1049 $h{'mouse'} = 'mickey' ;
1050
1051 # iterate through the associative array
1052 # and print each key/value pair.
2c2d71f5 1053 foreach (sort keys %h)
88108326 1054 { print "$_ -> $h{$_}\n" }
1055
f6b705ef 1056 untie %h ;
1057
88108326 1058Here is the output:
1059
1060 Smith -> John
1061 Wall -> Larry
1062 Wall -> Larry
f6b705ef 1063 Wall -> Larry
88108326 1064 mouse -> mickey
1065
f6b705ef 1066As you can see 3 records have been successfully created with key C<Wall>
88108326 1067- the only thing is, when they are retrieved from the database they
f6b705ef 1068I<seem> to have the same value, namely C<Larry>. The problem is caused
1069by the way that the associative array interface works. Basically, when
1070the associative array interface is used to fetch the value associated
1071with a given key, it will only ever retrieve the first value.
88108326 1072
1073Although it may not be immediately obvious from the code above, the
1074associative array interface can be used to write values with duplicate
1075keys, but it cannot be used to read them back from the database.
1076
1077The way to get around this problem is to use the Berkeley DB API method
1078called C<seq>. This method allows sequential access to key/value
f6b705ef 1079pairs. See L<THE API INTERFACE> for details of both the C<seq> method
1080and the API in general.
88108326 1081
1082Here is the script above rewritten using the C<seq> API method.
1083
3245f058 1084 use warnings ;
610ab055 1085 use strict ;
88108326 1086 use DB_File ;
bbc7dcd2 1087
962cee9f 1088 my ($filename, $x, %h, $status, $key, $value) ;
610ab055 1089
88108326 1090 $filename = "tree" ;
1091 unlink $filename ;
bbc7dcd2 1092
88108326 1093 # Enable duplicate records
1094 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1095
45a340cb 1096 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
88108326 1097 or die "Cannot open $filename: $!\n";
bbc7dcd2 1098
88108326 1099 # Add some key/value pairs to the file
1100 $h{'Wall'} = 'Larry' ;
1101 $h{'Wall'} = 'Brick' ; # Note the duplicate key
f6b705ef 1102 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value
88108326 1103 $h{'Smith'} = 'John' ;
1104 $h{'mouse'} = 'mickey' ;
bbc7dcd2 1105
f6b705ef 1106 # iterate through the btree using seq
88108326 1107 # and print each key/value pair.
610ab055 1108 $key = $value = 0 ;
f6b705ef 1109 for ($status = $x->seq($key, $value, R_FIRST) ;
1110 $status == 0 ;
1111 $status = $x->seq($key, $value, R_NEXT) )
88108326 1112 { print "$key -> $value\n" }
bbc7dcd2 1113
88108326 1114 undef $x ;
1115 untie %h ;
1116
1117that prints:
1118
1119 Smith -> John
1120 Wall -> Brick
f6b705ef 1121 Wall -> Brick
88108326 1122 Wall -> Larry
1123 mouse -> mickey
1124
f6b705ef 1125This time we have got all the key/value pairs, including the multiple
88108326 1126values associated with the key C<Wall>.
1127
6ca2e664
PM
1128To make life easier when dealing with duplicate keys, B<DB_File> comes with
1129a few utility methods.
1130
68dc0745 1131=head2 The get_dup() Method
f6b705ef 1132
6ca2e664 1133The C<get_dup> method assists in
88108326 1134reading duplicate values from BTREE databases. The method can take the
1135following forms:
1136
1137 $count = $x->get_dup($key) ;
1138 @list = $x->get_dup($key) ;
1139 %list = $x->get_dup($key, 1) ;
1140
1141In a scalar context the method returns the number of values associated
1142with the key, C<$key>.
1143
1144In list context, it returns all the values which match C<$key>. Note
f6b705ef 1145that the values will be returned in an apparently random order.
88108326 1146
7a2e2cd6 1147In list context, if the second parameter is present and evaluates
1148TRUE, the method returns an associative array. The keys of the
1149associative array correspond to the values that matched in the BTREE
1150and the values of the array are a count of the number of times that
1151particular value occurred in the BTREE.
88108326 1152
f6b705ef 1153So assuming the database created above, we can use C<get_dup> like
88108326 1154this:
1155
3245f058 1156 use warnings ;
2c2d71f5
JH
1157 use strict ;
1158 use DB_File ;
bbc7dcd2 1159
962cee9f 1160 my ($filename, $x, %h) ;
2c2d71f5
JH
1161
1162 $filename = "tree" ;
bbc7dcd2 1163
2c2d71f5
JH
1164 # Enable duplicate records
1165 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1166
45a340cb 1167 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
2c2d71f5
JH
1168 or die "Cannot open $filename: $!\n";
1169
610ab055 1170 my $cnt = $x->get_dup("Wall") ;
88108326 1171 print "Wall occurred $cnt times\n" ;
1172
610ab055 1173 my %hash = $x->get_dup("Wall", 1) ;
88108326 1174 print "Larry is there\n" if $hash{'Larry'} ;
f6b705ef 1175 print "There are $hash{'Brick'} Brick Walls\n" ;
88108326 1176
2c2d71f5 1177 my @list = sort $x->get_dup("Wall") ;
88108326 1178 print "Wall => [@list]\n" ;
1179
f6b705ef 1180 @list = $x->get_dup("Smith") ;
88108326 1181 print "Smith => [@list]\n" ;
bbc7dcd2 1182
f6b705ef 1183 @list = $x->get_dup("Dog") ;
88108326 1184 print "Dog => [@list]\n" ;
1185
1186
1187and it will print:
1188
f6b705ef 1189 Wall occurred 3 times
88108326 1190 Larry is there
f6b705ef 1191 There are 2 Brick Walls
1192 Wall => [Brick Brick Larry]
88108326 1193 Smith => [John]
1194 Dog => []
3b35bae3 1195
6ca2e664
PM
1196=head2 The find_dup() Method
1197
1198 $status = $X->find_dup($key, $value) ;
1199
b90e71be 1200This method checks for the existence of a specific key/value pair. If the
6ca2e664
PM
1201pair exists, the cursor is left pointing to the pair and the method
1202returns 0. Otherwise the method returns a non-zero value.
1203
1204Assuming the database from the previous example:
1205
3245f058 1206 use warnings ;
6ca2e664
PM
1207 use strict ;
1208 use DB_File ;
bbc7dcd2 1209
962cee9f 1210 my ($filename, $x, %h, $found) ;
6ca2e664 1211
07200f1b 1212 $filename = "tree" ;
bbc7dcd2 1213
6ca2e664
PM
1214 # Enable duplicate records
1215 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1216
45a340cb 1217 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1218 or die "Cannot open $filename: $!\n";
1219
1220 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1221 print "Larry Wall is $found there\n" ;
bbc7dcd2 1222
6ca2e664
PM
1223 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ;
1224 print "Harry Wall is $found there\n" ;
bbc7dcd2 1225
6ca2e664
PM
1226 undef $x ;
1227 untie %h ;
1228
1229prints this
1230
2c2d71f5 1231 Larry Wall is there
6ca2e664
PM
1232 Harry Wall is not there
1233
1234
1235=head2 The del_dup() Method
1236
1237 $status = $X->del_dup($key, $value) ;
1238
1239This method deletes a specific key/value pair. It returns
12400 if they exist and have been deleted successfully.
1241Otherwise the method returns a non-zero value.
1242
b90e71be 1243Again assuming the existence of the C<tree> database
6ca2e664 1244
3245f058 1245 use warnings ;
6ca2e664
PM
1246 use strict ;
1247 use DB_File ;
bbc7dcd2 1248
962cee9f 1249 my ($filename, $x, %h, $found) ;
6ca2e664 1250
07200f1b 1251 $filename = "tree" ;
bbc7dcd2 1252
6ca2e664
PM
1253 # Enable duplicate records
1254 $DB_BTREE->{'flags'} = R_DUP ;
bbc7dcd2 1255
45a340cb 1256 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
6ca2e664
PM
1257 or die "Cannot open $filename: $!\n";
1258
1259 $x->del_dup("Wall", "Larry") ;
1260
1261 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ;
1262 print "Larry Wall is $found there\n" ;
bbc7dcd2 1263
6ca2e664
PM
1264 undef $x ;
1265 untie %h ;
1266
1267prints this
1268
1269 Larry Wall is not there
1270
f6b705ef 1271=head2 Matching Partial Keys
1272
1273The BTREE interface has a feature which allows partial keys to be
1274matched. This functionality is I<only> available when the C<seq> method
1275is used along with the R_CURSOR flag.
1276
1277 $x->seq($key, $value, R_CURSOR) ;
1278
1279Here is the relevant quote from the dbopen man page where it defines
1280the use of the R_CURSOR flag with seq:
1281
f6b705ef 1282 Note, for the DB_BTREE access method, the returned key is not
1283 necessarily an exact match for the specified key. The returned key
1284 is the smallest key greater than or equal to the specified key,
1285 permitting partial key matches and range searches.
1286
f6b705ef 1287In the example script below, the C<match> sub uses this feature to find
1288and print the first matching key/value pair given a partial key.
1289
3245f058 1290 use warnings ;
610ab055 1291 use strict ;
f6b705ef 1292 use DB_File ;
1293 use Fcntl ;
610ab055 1294
962cee9f 1295 my ($filename, $x, %h, $st, $key, $value) ;
f6b705ef 1296
1297 sub match
1298 {
1299 my $key = shift ;
610ab055 1300 my $value = 0;
f6b705ef 1301 my $orig_key = $key ;
1302 $x->seq($key, $value, R_CURSOR) ;
1303 print "$orig_key\t-> $key\t-> $value\n" ;
1304 }
1305
1306 $filename = "tree" ;
1307 unlink $filename ;
1308
45a340cb 1309 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE
f6b705ef 1310 or die "Cannot open $filename: $!\n";
bbc7dcd2 1311
f6b705ef 1312 # Add some key/value pairs to the file
1313 $h{'mouse'} = 'mickey' ;
1314 $h{'Wall'} = 'Larry' ;
1315 $h{'Walls'} = 'Brick' ;
1316 $h{'Smith'} = 'John' ;
bbc7dcd2 1317
f6b705ef 1318
610ab055 1319 $key = $value = 0 ;
f6b705ef 1320 print "IN ORDER\n" ;
1321 for ($st = $x->seq($key, $value, R_FIRST) ;
1322 $st == 0 ;
1323 $st = $x->seq($key, $value, R_NEXT) )
bbc7dcd2 1324
2c2d71f5 1325 { print "$key -> $value\n" }
bbc7dcd2 1326
f6b705ef 1327 print "\nPARTIAL MATCH\n" ;
1328
1329 match "Wa" ;
1330 match "A" ;
1331 match "a" ;
1332
1333 undef $x ;
1334 untie %h ;
1335
1336Here is the output:
1337
1338 IN ORDER
1339 Smith -> John
1340 Wall -> Larry
1341 Walls -> Brick
1342 mouse -> mickey
1343
1344 PARTIAL MATCH
1345 Wa -> Wall -> Larry
1346 A -> Smith -> John
1347 a -> mouse -> mickey
1348
1349=head1 DB_RECNO
1350
1351DB_RECNO provides an interface to flat text files. Both variable and
1352fixed length records are supported.
3b35bae3 1353
6ca2e664 1354In order to make RECNO more compatible with Perl, the array offset for
88108326 1355all RECNO arrays begins at 0 rather than 1 as in Berkeley DB.
3b35bae3 1356
88108326 1357As with normal Perl arrays, a RECNO array can be accessed using
1358negative indexes. The index -1 refers to the last element of the array,
1359-2 the second last, and so on. Attempting to access an element before
1360the start of the array will raise a fatal run-time error.
3b35bae3 1361
68dc0745 1362=head2 The 'bval' Option
36477c24 1363
1364The operation of the bval option warrants some discussion. Here is the
1365definition of bval from the Berkeley DB 1.85 recno manual page:
1366
1367 The delimiting byte to be used to mark the end of a
1368 record for variable-length records, and the pad charac-
1369 ter for fixed-length records. If no value is speci-
1370 fied, newlines (``\n'') are used to mark the end of
1371 variable-length records and fixed-length records are
1372 padded with spaces.
1373
1374The second sentence is wrong. In actual fact bval will only default to
1375C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL
1376openinfo parameter is used at all, the value that happens to be in bval
1377will be used. That means you always have to specify bval when making
1378use of any of the options in the openinfo parameter. This documentation
1379error will be fixed in the next release of Berkeley DB.
1380
1381That clarifies the situation with regards Berkeley DB itself. What
1382about B<DB_File>? Well, the behavior defined in the quote above is
6ca2e664 1383quite useful, so B<DB_File> conforms to it.
36477c24 1384
1385That means that you can specify other options (e.g. cachesize) and
1386still have bval default to C<"\n"> for variable length records, and
1387space for fixed length records.
1388
c5da4faf 1389Also note that the bval option only allows you to specify a single byte
a6d6498e 1390as a delimiter.
c5da4faf 1391
f6b705ef 1392=head2 A Simple Example
3b35bae3 1393
6ca2e664
PM
1394Here is a simple example that uses RECNO (if you are using a version
1395of Perl earlier than 5.004_57 this example won't work -- see
1396L<Extra RECNO Methods> for a workaround).
f6b705ef 1397
3245f058 1398 use warnings ;
610ab055 1399 use strict ;
f6b705ef 1400 use DB_File ;
f6b705ef 1401
2c2d71f5
JH
1402 my $filename = "text" ;
1403 unlink $filename ;
1404
610ab055 1405 my @h ;
45a340cb 1406 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1407 or die "Cannot open file 'text': $!\n" ;
1408
1409 # Add a few key/value pairs to the file
1410 $h[0] = "orange" ;
1411 $h[1] = "blue" ;
1412 $h[2] = "yellow" ;
1413
6ca2e664
PM
1414 push @h, "green", "black" ;
1415
1416 my $elements = scalar @h ;
1417 print "The array contains $elements entries\n" ;
1418
1419 my $last = pop @h ;
1420 print "popped $last\n" ;
1421
1422 unshift @h, "white" ;
1423 my $first = shift @h ;
1424 print "shifted $first\n" ;
1425
f6b705ef 1426 # Check for existence of a key
1427 print "Element 1 Exists with value $h[1]\n" if $h[1] ;
1428
1429 # use a negative index
1430 print "The last element is $h[-1]\n" ;
1431 print "The 2nd last element is $h[-2]\n" ;
1432
1433 untie @h ;
3b35bae3 1434
f6b705ef 1435Here is the output from the script:
1436
6ca2e664
PM
1437 The array contains 5 entries
1438 popped black
2c2d71f5 1439 shifted white
f6b705ef 1440 Element 1 Exists with value blue
6ca2e664
PM
1441 The last element is green
1442 The 2nd last element is yellow
f6b705ef 1443
6ca2e664 1444=head2 Extra RECNO Methods
f6b705ef 1445
045291aa 1446If you are using a version of Perl earlier than 5.004_57, the tied
6ca2e664
PM
1447array interface is quite limited. In the example script above
1448C<push>, C<pop>, C<shift>, C<unshift>
1449or determining the array length will not work with a tied array.
045291aa
PM
1450
1451To make the interface more useful for older versions of Perl, a number
1452of methods are supplied with B<DB_File> to simulate the missing array
1453operations. All these methods are accessed via the object returned from
1454the tie call.
f6b705ef 1455
1456Here are the methods:
1457
1458=over 5
3b35bae3 1459
f6b705ef 1460=item B<$X-E<gt>push(list) ;>
1461
1462Pushes the elements of C<list> to the end of the array.
1463
1464=item B<$value = $X-E<gt>pop ;>
1465
1466Removes and returns the last element of the array.
1467
1468=item B<$X-E<gt>shift>
1469
1470Removes and returns the first element of the array.
1471
1472=item B<$X-E<gt>unshift(list) ;>
1473
1474Pushes the elements of C<list> to the start of the array.
1475
1476=item B<$X-E<gt>length>
1477
1478Returns the number of elements in the array.
1479
c5da4faf
PM
1480=item B<$X-E<gt>splice(offset, length, elements);>
1481
a6d05634 1482Returns a splice of the array.
c5da4faf 1483
f6b705ef 1484=back
1485
1486=head2 Another Example
1487
1488Here is a more complete example that makes use of some of the methods
1489described above. It also makes use of the API interface directly (see
1490L<THE API INTERFACE>).
1491
3245f058 1492 use warnings ;
f6b705ef 1493 use strict ;
962cee9f 1494 my (@h, $H, $file, $i) ;
f6b705ef 1495 use DB_File ;
1496 use Fcntl ;
bbc7dcd2 1497
f6b705ef 1498 $file = "text" ;
1499
1500 unlink $file ;
1501
45a340cb 1502 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO
f6b705ef 1503 or die "Cannot open file $file: $!\n" ;
bbc7dcd2 1504
f6b705ef 1505 # first create a text file to play with
1506 $h[0] = "zero" ;
1507 $h[1] = "one" ;
1508 $h[2] = "two" ;
1509 $h[3] = "three" ;
1510 $h[4] = "four" ;
1511
bbc7dcd2 1512
f6b705ef 1513 # Print the records in order.
1514 #
1515 # The length method is needed here because evaluating a tied
1516 # array in a scalar context does not return the number of
1517 # elements in the array.
1518
1519 print "\nORIGINAL\n" ;
1520 foreach $i (0 .. $H->length - 1) {
1521 print "$i: $h[$i]\n" ;
1522 }
1523
1524 # use the push & pop methods
1525 $a = $H->pop ;
1526 $H->push("last") ;
1527 print "\nThe last record was [$a]\n" ;
1528
1529 # and the shift & unshift methods
1530 $a = $H->shift ;
1531 $H->unshift("first") ;
1532 print "The first record was [$a]\n" ;
1533
1534 # Use the API to add a new record after record 2.
1535 $i = 2 ;
1536 $H->put($i, "Newbie", R_IAFTER) ;
1537
1538 # and a new record before record 1.
1539 $i = 1 ;
1540 $H->put($i, "New One", R_IBEFORE) ;
1541
1542 # delete record 3
1543 $H->del(3) ;
1544
1545 # now print the records in reverse order
1546 print "\nREVERSE\n" ;
1547 for ($i = $H->length - 1 ; $i >= 0 ; -- $i)
1548 { print "$i: $h[$i]\n" }
1549
1550 # same again, but use the API functions instead
1551 print "\nREVERSE again\n" ;
610ab055 1552 my ($s, $k, $v) = (0, 0, 0) ;
f6b705ef 1553 for ($s = $H->seq($k, $v, R_LAST) ;
1554 $s == 0 ;
1555 $s = $H->seq($k, $v, R_PREV))
1556 { print "$k: $v\n" }
1557
1558 undef $H ;
1559 untie @h ;
1560
1561and this is what it outputs:
1562
1563 ORIGINAL
1564 0: zero
1565 1: one
1566 2: two
1567 3: three
1568 4: four
1569
1570 The last record was [four]
1571 The first record was [zero]
1572
1573 REVERSE
1574 5: last
1575 4: three
1576 3: Newbie
1577 2: one
1578 1: New One
1579 0: first
1580
1581 REVERSE again
1582 5: last
1583 4: three
1584 3: Newbie
1585 2: one
1586 1: New One
1587 0: first
1588
1589Notes:
1590
1591=over 5
1592
1593=item 1.
1594
1595Rather than iterating through the array, C<@h> like this:
1596
1597 foreach $i (@h)
1598
1599it is necessary to use either this:
1600
1601 foreach $i (0 .. $H->length - 1)
1602
1603or this:
1604
1605 for ($a = $H->get($k, $v, R_FIRST) ;
1606 $a == 0 ;
1607 $a = $H->get($k, $v, R_NEXT) )
1608
1609=item 2.
1610
1611Notice that both times the C<put> method was used the record index was
1612specified using a variable, C<$i>, rather than the literal value
1613itself. This is because C<put> will return the record number of the
1614inserted line via that parameter.
1615
1616=back
1617
1618=head1 THE API INTERFACE
3b35bae3
AD
1619
1620As well as accessing Berkeley DB using a tied hash or array, it is also
88108326 1621possible to make direct use of most of the API functions defined in the
8e07c86e 1622Berkeley DB documentation.
3b35bae3 1623
88108326 1624To do this you need to store a copy of the object returned from the tie.
3b35bae3 1625
88108326 1626 $db = tie %hash, "DB_File", "filename" ;
3b35bae3 1627
8e07c86e 1628Once you have done that, you can access the Berkeley DB API functions
88108326 1629as B<DB_File> methods directly like this:
3b35bae3
AD
1630
1631 $db->put($key, $value, R_NOOVERWRITE) ;
1632
88108326 1633B<Important:> If you have saved a copy of the object returned from
1634C<tie>, the underlying database file will I<not> be closed until both
1635the tied variable is untied and all copies of the saved object are
610ab055 1636destroyed.
88108326 1637
1638 use DB_File ;
1639 $db = tie %hash, "DB_File", "filename"
1640 or die "Cannot tie filename: $!" ;
1641 ...
1642 undef $db ;
1643 untie %hash ;
1644
9a2c4ce3 1645See L<The untie() Gotcha> for more details.
778183f3 1646
88108326 1647All the functions defined in L<dbopen> are available except for
1648close() and dbopen() itself. The B<DB_File> method interface to the
1649supported functions have been implemented to mirror the way Berkeley DB
1650works whenever possible. In particular note that:
1651
1652=over 5
1653
1654=item *
1655
1656The methods return a status value. All return 0 on success.
1657All return -1 to signify an error and set C<$!> to the exact
1658error code. The return code 1 generally (but not always) means that the
1659key specified did not exist in the database.
1660
1661Other return codes are defined. See below and in the Berkeley DB
1662documentation for details. The Berkeley DB documentation should be used
1663as the definitive source.
1664
1665=item *
3b35bae3 1666
88108326 1667Whenever a Berkeley DB function returns data via one of its parameters,
1668the equivalent B<DB_File> method does exactly the same.
3b35bae3 1669
88108326 1670=item *
1671
1672If you are careful, it is possible to mix API calls with the tied
1673hash/array interface in the same piece of code. Although only a few of
1674the methods used to implement the tied interface currently make use of
1675the cursor, you should always assume that the cursor has been changed
1676any time the tied hash/array interface is used. As an example, this
1677code will probably not do what you expect:
1678
1679 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1680 or die "Cannot tie $filename: $!" ;
1681
1682 # Get the first key/value pair and set the cursor
1683 $X->seq($key, $value, R_FIRST) ;
1684
1685 # this line will modify the cursor
1686 $count = scalar keys %x ;
1687
1688 # Get the second key/value pair.
1689 # oops, it didn't, it got the last key/value pair!
1690 $X->seq($key, $value, R_NEXT) ;
1691
1692The code above can be rearranged to get around the problem, like this:
1693
1694 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE
1695 or die "Cannot tie $filename: $!" ;
1696
1697 # this line will modify the cursor
1698 $count = scalar keys %x ;
1699
1700 # Get the first key/value pair and set the cursor
1701 $X->seq($key, $value, R_FIRST) ;
1702
1703 # Get the second key/value pair.
1704 # worked this time.
1705 $X->seq($key, $value, R_NEXT) ;
1706
1707=back
1708
1709All the constants defined in L<dbopen> for use in the flags parameters
1710in the methods defined below are also available. Refer to the Berkeley
1711DB documentation for the precise meaning of the flags values.
1712
1713Below is a list of the methods available.
3b35bae3
AD
1714
1715=over 5
1716
f6b705ef 1717=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;>
88108326 1718
1719Given a key (C<$key>) this method reads the value associated with it
1720from the database. The value read from the database is returned in the
1721C<$value> parameter.
3b35bae3 1722
88108326 1723If the key does not exist the method returns 1.
3b35bae3 1724
88108326 1725No flags are currently defined for this method.
3b35bae3 1726
f6b705ef 1727=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;>
3b35bae3 1728
88108326 1729Stores the key/value pair in the database.
1730
1731If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter
8e07c86e 1732will have the record number of the inserted key/value pair set.
3b35bae3 1733
88108326 1734Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and
1735R_SETCURSOR.
1736
f6b705ef 1737=item B<$status = $X-E<gt>del($key [, $flags]) ;>
3b35bae3 1738
88108326 1739Removes all key/value pairs with key C<$key> from the database.
3b35bae3 1740
88108326 1741A return code of 1 means that the requested key was not in the
1742database.
3b35bae3 1743
88108326 1744R_CURSOR is the only valid flag at present.
3b35bae3 1745
f6b705ef 1746=item B<$status = $X-E<gt>fd ;>
3b35bae3 1747
88108326 1748Returns the file descriptor for the underlying database.
3b35bae3 1749
b90e71be
GS
1750See L<Locking: The Trouble with fd> for an explanation for why you should
1751not use C<fd> to lock your database.
3b35bae3 1752
f6b705ef 1753=item B<$status = $X-E<gt>seq($key, $value, $flags) ;>
3b35bae3 1754
88108326 1755This interface allows sequential retrieval from the database. See
1756L<dbopen> for full details.
1757
1758Both the C<$key> and C<$value> parameters will be set to the key/value
1759pair read from the database.
1760
1761The flags parameter is mandatory. The valid flag values are R_CURSOR,
1762R_FIRST, R_LAST, R_NEXT and R_PREV.
1763
f6b705ef 1764=item B<$status = $X-E<gt>sync([$flags]) ;>
88108326 1765
1766Flushes any cached buffers to disk.
1767
1768R_RECNOSYNC is the only valid flag at present.
3b35bae3
AD
1769
1770=back
1771
cad2e5aa
JH
1772=head1 DBM FILTERS
1773
1774A DBM Filter is a piece of code that is be used when you I<always>
1775want to make the same transformation to all keys and/or values in a
1776DBM database.
1777
1778There are four methods associated with DBM Filters. All work identically,
1779and each is used to install (or uninstall) a single DBM Filter. Each
1780expects a single parameter, namely a reference to a sub. The only
1781difference between them is the place that the filter is installed.
1782
1783To summarise:
1784
1785=over 5
1786
1787=item B<filter_store_key>
1788
1789If a filter has been installed with this method, it will be invoked
1790every time you write a key to a DBM database.
1791
1792=item B<filter_store_value>
1793
1794If a filter has been installed with this method, it will be invoked
1795every time you write a value to a DBM database.
1796
1797
1798=item B<filter_fetch_key>
1799
1800If a filter has been installed with this method, it will be invoked
1801every time you read a key from a DBM database.
1802
1803=item B<filter_fetch_value>
1804
1805If a filter has been installed with this method, it will be invoked
1806every time you read a value from a DBM database.
1807
1808=back
1809
1810You can use any combination of the methods, from none, to all four.
1811
1812All filter methods return the existing filter, if present, or C<undef>
1813in not.
1814
1815To delete a filter pass C<undef> to it.
1816
1817=head2 The Filter
1818
1819When each filter is called by Perl, a local copy of C<$_> will contain
1820the key or value to be filtered. Filtering is achieved by modifying
1821the contents of C<$_>. The return code from the filter is ignored.
1822
1823=head2 An Example -- the NULL termination problem.
1824
1825Consider the following scenario. You have a DBM database
1826that you need to share with a third-party C application. The C application
1827assumes that I<all> keys and values are NULL terminated. Unfortunately
1828when Perl writes to DBM databases it doesn't use NULL termination, so
1829your Perl application will have to manage NULL termination itself. When
1830you write to the database you will have to use something like this:
1831
1832 $hash{"$key\0"} = "$value\0" ;
1833
1834Similarly the NULL needs to be taken into account when you are considering
1835the length of existing keys/values.
1836
1837It would be much better if you could ignore the NULL terminations issue
1838in the main application code and have a mechanism that automatically
1839added the terminating NULL to all keys and values whenever you write to
1840the database and have them removed when you read from the database. As I'm
1841sure you have already guessed, this is a problem that DBM Filters can
1842fix very easily.
1843
3245f058 1844 use warnings ;
cad2e5aa
JH
1845 use strict ;
1846 use DB_File ;
1847
1848 my %hash ;
2359510d 1849 my $filename = "filt" ;
cad2e5aa
JH
1850 unlink $filename ;
1851
1852 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1853 or die "Cannot open $filename: $!\n" ;
1854
1855 # Install DBM Filters
1856 $db->filter_fetch_key ( sub { s/\0$// } ) ;
1857 $db->filter_store_key ( sub { $_ .= "\0" } ) ;
1858 $db->filter_fetch_value( sub { s/\0$// } ) ;
1859 $db->filter_store_value( sub { $_ .= "\0" } ) ;
1860
1861 $hash{"abc"} = "def" ;
1862 my $a = $hash{"ABC"} ;
1863 # ...
1864 undef $db ;
1865 untie %hash ;
1866
1867Hopefully the contents of each of the filters should be
1868self-explanatory. Both "fetch" filters remove the terminating NULL,
1869and both "store" filters add a terminating NULL.
1870
1871
1872=head2 Another Example -- Key is a C int.
1873
1874Here is another real-life example. By default, whenever Perl writes to
1875a DBM database it always writes the key and value as strings. So when
1876you use this:
1877
3c4b39be 1878 $hash{12345} = "something" ;
cad2e5aa
JH
1879
1880the key 12345 will get stored in the DBM database as the 5 byte string
1881"12345". If you actually want the key to be stored in the DBM database
1882as a C int, you will have to use C<pack> when writing, and C<unpack>
1883when reading.
1884
1885Here is a DBM Filter that does it:
1886
3245f058 1887 use warnings ;
cad2e5aa
JH
1888 use strict ;
1889 use DB_File ;
1890 my %hash ;
2359510d 1891 my $filename = "filt" ;
cad2e5aa
JH
1892 unlink $filename ;
1893
1894
1895 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH
1896 or die "Cannot open $filename: $!\n" ;
1897
1898 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ;
1899 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ;
1900 $hash{123} = "def" ;
1901 # ...
1902 undef $db ;
1903 untie %hash ;
1904
1905This time only two filters have been used -- we only need to manipulate
1906the contents of the key, so it wasn't necessary to install any value
1907filters.
1908
f6b705ef 1909=head1 HINTS AND TIPS
3b35bae3 1910
3b35bae3 1911
b90e71be 1912=head2 Locking: The Trouble with fd
3b35bae3 1913
b90e71be
GS
1914Until version 1.72 of this module, the recommended technique for locking
1915B<DB_File> databases was to flock the filehandle returned from the "fd"
1916function. Unfortunately this technique has been shown to be fundamentally
1917flawed (Kudos to David Harris for tracking this down). Use it at your own
1918peril!
3b35bae3 1919
b90e71be 1920The locking technique went like this.
cb1a09d0 1921
2359510d
SD
1922 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644)
1923 || die "dbcreat foo.db $!";
b90e71be
GS
1924 $fd = $db->fd;
1925 open(DB_FH, "+<&=$fd") || die "dup $!";
1926 flock (DB_FH, LOCK_EX) || die "flock: $!";
1927 ...
1928 $db{"Tom"} = "Jerry" ;
1929 ...
1930 flock(DB_FH, LOCK_UN);
1931 undef $db;
1932 untie %db;
1933 close(DB_FH);
cb1a09d0 1934
b90e71be 1935In simple terms, this is what happens:
cb1a09d0 1936
b90e71be 1937=over 5
cb1a09d0 1938
b90e71be 1939=item 1.
cb1a09d0 1940
b90e71be 1941Use "tie" to open the database.
cb1a09d0 1942
b90e71be 1943=item 2.
cb1a09d0 1944
b90e71be 1945Lock the database with fd & flock.
cb1a09d0 1946
b90e71be 1947=item 3.
cb1a09d0 1948
b90e71be 1949Read & Write to the database.
cb1a09d0 1950
b90e71be 1951=item 4.
cb1a09d0 1952
b90e71be 1953Unlock and close the database.
cb1a09d0 1954
b90e71be
GS
1955=back
1956
1957Here is the crux of the problem. A side-effect of opening the B<DB_File>
1958database in step 2 is that an initial block from the database will get
1959read from disk and cached in memory.
1960
1961To see why this is a problem, consider what can happen when two processes,
1962say "A" and "B", both want to update the same B<DB_File> database
1963using the locking steps outlined above. Assume process "A" has already
1964opened the database and has a write lock, but it hasn't actually updated
1965the database yet (it has finished step 2, but not started step 3 yet). Now
1966process "B" tries to open the same database - step 1 will succeed,
1967but it will block on step 2 until process "A" releases the lock. The
1968important thing to notice here is that at this point in time both
1969processes will have cached identical initial blocks from the database.
1970
1971Now process "A" updates the database and happens to change some of the
1972data held in the initial buffer. Process "A" terminates, flushing
1973all cached data to disk and releasing the database lock. At this point
1974the database on disk will correctly reflect the changes made by process
1975"A".
1976
1977With the lock released, process "B" can now continue. It also updates the
1978database and unfortunately it too modifies the data that was in its
1979initial buffer. Once that data gets flushed to disk it will overwrite
1980some/all of the changes process "A" made to the database.
1981
1982The result of this scenario is at best a database that doesn't contain
1983what you expect. At worst the database will corrupt.
1984
1985The above won't happen every time competing process update the same
1986B<DB_File> database, but it does illustrate why the technique should
1987not be used.
1988
1989=head2 Safe ways to lock a database
1990
1991Starting with version 2.x, Berkeley DB has internal support for locking.
1992The companion module to this one, B<BerkeleyDB>, provides an interface
1993to this locking functionality. If you are serious about locking
1994Berkeley DB databases, I strongly recommend using B<BerkeleyDB>.
1995
1996If using B<BerkeleyDB> isn't an option, there are a number of modules
1997available on CPAN that can be used to implement locking. Each one
1998implements locking differently and has different goals in mind. It is
1999therefore worth knowing the difference, so that you can pick the right
2000one for your application. Here are the three locking wrappers:
2001
2002=over 5
2003
2004=item B<Tie::DB_Lock>
2005
2006A B<DB_File> wrapper which creates copies of the database file for
2007read access, so that you have a kind of a multiversioning concurrent read
2008system. However, updates are still serial. Use for databases where reads
2009may be lengthy and consistency problems may occur.
2010
2011=item B<Tie::DB_LockFile>
2012
2013A B<DB_File> wrapper that has the ability to lock and unlock the database
2014while it is being used. Avoids the tie-before-flock problem by simply
2015re-tie-ing the database when you get or drop a lock. Because of the
2016flexibility in dropping and re-acquiring the lock in the middle of a
2017session, this can be massaged into a system that will work with long
2018updates and/or reads if the application follows the hints in the POD
2019documentation.
2020
2021=item B<DB_File::Lock>
2022
2023An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile
2024before tie-ing the database and drops the lock after the untie. Allows
2025one to use the same lockfile for multiple databases to avoid deadlock
2026problems, if desired. Use for databases where updates are reads are
2027quick and simple flock locking semantics are enough.
2028
2029=back
cb1a09d0 2030
68dc0745 2031=head2 Sharing Databases With C Applications
f6b705ef 2032
2033There is no technical reason why a Berkeley DB database cannot be
2034shared by both a Perl and a C application.
2035
2036The vast majority of problems that are reported in this area boil down
2037to the fact that C strings are NULL terminated, whilst Perl strings are
cad2e5aa 2038not. See L<DBM FILTERS> for a generic way to work around this problem.
f6b705ef 2039
2040Here is a real example. Netscape 2.0 keeps a record of the locations you
2041visit along with the time you last visited them in a DB_HASH database.
2042This is usually stored in the file F<~/.netscape/history.db>. The key
2043field in the database is the location string and the value field is the
2044time the location was last visited stored as a 4 byte binary value.
2045
2046If you haven't already guessed, the location string is stored with a
2047terminating NULL. This means you need to be careful when accessing the
2048database.
2049
2050Here is a snippet of code that is loosely based on Tom Christiansen's
2051I<ggh> script (available from your nearest CPAN archive in
2052F<authors/id/TOMC/scripts/nshist.gz>).
2053
3245f058 2054 use warnings ;
610ab055 2055 use strict ;
f6b705ef 2056 use DB_File ;
2057 use Fcntl ;
f6b705ef 2058
962cee9f 2059 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ;
f6b705ef 2060 $dotdir = $ENV{HOME} || $ENV{LOGNAME};
2061
2062 $HISTORY = "$dotdir/.netscape/history.db";
2063
2064 tie %hist_db, 'DB_File', $HISTORY
2065 or die "Cannot open $HISTORY: $!\n" ;;
2066
2067 # Dump the complete database
2068 while ( ($href, $binary_time) = each %hist_db ) {
2069
2070 # remove the terminating NULL
2071 $href =~ s/\x00$// ;
2072
2073 # convert the binary time into a user friendly string
2074 $date = localtime unpack("V", $binary_time);
2075 print "$date $href\n" ;
2076 }
2077
2078 # check for the existence of a specific key
2079 # remember to add the NULL
2080 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) {
2081 $date = localtime unpack("V", $binary_time) ;
2082 print "Last visited mox.perl.com on $date\n" ;
2083 }
2084 else {
2085 print "Never visited mox.perl.com\n"
2086 }
2087
2088 untie %hist_db ;
2089
68dc0745 2090=head2 The untie() Gotcha
778183f3 2091
7a2e2cd6 2092If you make use of the Berkeley DB API, it is I<very> strongly
68dc0745 2093recommended that you read L<perltie/The untie Gotcha>.
778183f3
PM
2094
2095Even if you don't currently make use of the API interface, it is still
2096worth reading it.
2097
2098Here is an example which illustrates the problem from a B<DB_File>
2099perspective:
2100
2101 use DB_File ;
2102 use Fcntl ;
2103
2104 my %x ;
2105 my $X ;
2106
2107 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC
2108 or die "Cannot tie first time: $!" ;
2109
2110 $x{123} = 456 ;
2111
2112 untie %x ;
2113
2114 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2115 or die "Cannot tie second time: $!" ;
2116
2117 untie %x ;
2118
2119When run, the script will produce this error message:
2120
2121 Cannot tie second time: Invalid argument at bad.file line 14.
2122
2123Although the error message above refers to the second tie() statement
2124in the script, the source of the problem is really with the untie()
2125statement that precedes it.
2126
2127Having read L<perltie> you will probably have already guessed that the
2128error is caused by the extra copy of the tied object stored in C<$X>.
2129If you haven't, then the problem boils down to the fact that the
2130B<DB_File> destructor, DESTROY, will not be called until I<all>
2131references to the tied object are destroyed. Both the tied variable,
2132C<%x>, and C<$X> above hold a reference to the object. The call to
2133untie() will destroy the first, but C<$X> still holds a valid
2134reference, so the destructor will not get called and the database file
2135F<tst.fil> will remain open. The fact that Berkeley DB then reports the
b90e71be 2136attempt to open a database that is already open via the catch-all
778183f3
PM
2137"Invalid argument" doesn't help.
2138
2139If you run the script with the C<-w> flag the error message becomes:
2140
2141 untie attempted while 1 inner references still exist at bad.file line 12.
2142 Cannot tie second time: Invalid argument at bad.file line 14.
2143
2144which pinpoints the real problem. Finally the script can now be
2145modified to fix the original problem by destroying the API object
2146before the untie:
2147
2148 ...
2149 $x{123} = 456 ;
2150
2151 undef $X ;
2152 untie %x ;
2153
2154 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT
2155 ...
2156
f6b705ef 2157
2158=head1 COMMON QUESTIONS
2159
2160=head2 Why is there Perl source in my database?
2161
2162If you look at the contents of a database file created by DB_File,
2163there can sometimes be part of a Perl script included in it.
2164
2165This happens because Berkeley DB uses dynamic memory to allocate
2166buffers which will subsequently be written to the database file. Being
2167dynamic, the memory could have been used for anything before DB
2168malloced it. As Berkeley DB doesn't clear the memory once it has been
2169allocated, the unused portions will contain random junk. In the case
2170where a Perl script gets written to the database, the random junk will
2171correspond to an area of dynamic memory that happened to be used during
2172the compilation of the script.
2173
2174Unless you don't like the possibility of there being part of your Perl
2175scripts embedded in a database file, this is nothing to worry about.
2176
2177=head2 How do I store complex data structures with DB_File?
2178
2179Although B<DB_File> cannot do this directly, there is a module which
2180can layer transparently over B<DB_File> to accomplish this feat.
2181
2182Check out the MLDBM module, available on CPAN in the directory
2183F<modules/by-module/MLDBM>.
2184
2185=head2 What does "Invalid Argument" mean?
2186
2187You will get this error message when one of the parameters in the
2188C<tie> call is wrong. Unfortunately there are quite a few parameters to
2189get wrong, so it can be difficult to figure out which one it is.
2190
2191Here are a couple of possibilities:
2192
2193=over 5
2194
2195=item 1.
2196
610ab055 2197Attempting to reopen a database without closing it.
f6b705ef 2198
2199=item 2.
2200
2201Using the O_WRONLY flag.
2202
2203=back
2204
2205=head2 What does "Bareword 'DB_File' not allowed" mean?
2206
2207You will encounter this particular error message when you have the
2208C<strict 'subs'> pragma (or the full strict pragma) in your script.
2209Consider this script:
2210
3245f058 2211 use warnings ;
f6b705ef 2212 use strict ;
2213 use DB_File ;
07200f1b 2214 my %x ;
f6b705ef 2215 tie %x, DB_File, "filename" ;
2216
2217Running it produces the error in question:
2218
2219 Bareword "DB_File" not allowed while "strict subs" in use
2220
2221To get around the error, place the word C<DB_File> in either single or
2222double quotes, like this:
2223
2224 tie %x, "DB_File", "filename" ;
2225
2226Although it might seem like a real pain, it is really worth the effort
2227of having a C<use strict> in all your scripts.
2228
cad2e5aa
JH
2229=head1 REFERENCES
2230
2231Articles that are either about B<DB_File> or make use of it.
2232
2233=over 5
2234
2235=item 1.
2236
2237I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com),
2238Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41
2239
2240=back
2241
cb1a09d0
AD
2242=head1 HISTORY
2243
1f70e1ea 2244Moved to the Changes file.
610ab055 2245
1f70e1ea 2246=head1 BUGS
05475680 2247
1f70e1ea
PM
2248Some older versions of Berkeley DB had problems with fixed length
2249records using the RECNO file format. This problem has been fixed since
2250version 1.85 of Berkeley DB.
e858de61 2251
1f70e1ea
PM
2252I am sure there are bugs in the code. If you do find any, or can
2253suggest any enhancements, I would welcome your comments.
a6ed719b 2254
1f70e1ea 2255=head1 AVAILABILITY
a6ed719b 2256
1f70e1ea
PM
2257B<DB_File> comes with the standard Perl source distribution. Look in
2258the directory F<ext/DB_File>. Given the amount of time between releases
2259of Perl the version that ships with Perl is quite likely to be out of
2260date, so the most recent version can always be found on CPAN (see
5bbd4290 2261L<perlmodlib/CPAN> for details), in the directory
1f70e1ea 2262F<modules/by-module/DB_File>.
a6ed719b 2263
039d031f
PM
2264This version of B<DB_File> will work with either version 1.x, 2.x or
22653.x of Berkeley DB, but is limited to the functionality provided by
2266version 1.
a6ed719b 2267
10261a36 2268The official web site for Berkeley DB is F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>.
039d031f 2269All versions of Berkeley DB are available there.
93af7a87 2270
1f70e1ea
PM
2271Alternatively, Berkeley DB version 1 is available at your nearest CPAN
2272archive in F<src/misc/db.1.85.tar.gz>.
e858de61 2273
1f70e1ea 2274=head1 COPYRIGHT
3b35bae3 2275
acb29889 2276Copyright (c) 1995-2012 Paul Marquess. All rights reserved. This program
a9fd575d
PM
2277is free software; you can redistribute it and/or modify it under the
2278same terms as Perl itself.
3b35bae3 2279
1f70e1ea
PM
2280Although B<DB_File> is covered by the Perl license, the library it
2281makes use of, namely Berkeley DB, is not. Berkeley DB has its own
2282copyright and its own license. Please take the time to read it.
3b35bae3 2283
10af739e 2284Here are a few words taken from the Berkeley DB FAQ (at
10261a36 2285F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license:
68dc0745 2286
a9fd575d 2287 Do I have to license DB to use it in Perl scripts?
3b35bae3 2288
a9fd575d
PM
2289 No. The Berkeley DB license requires that software that uses
2290 Berkeley DB be freely redistributable. In the case of Perl, that
2291 software is Perl, and not your scripts. Any Perl scripts that you
2292 write are your property, including scripts that make use of
2293 Berkeley DB. Neither the Perl license nor the Berkeley DB license
2294 place any restriction on what you may do with them.
88108326 2295
1f70e1ea
PM
2296If you are in any doubt about the license situation, contact either the
2297Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details.
a0b8c8c1
PM
2298
2299
3b35bae3
AD
2300=head1 SEE ALSO
2301
5bbd4290
PM
2302L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>,
2303L<perldbmfilter>
3b35bae3 2304
3b35bae3
AD
2305=head1 AUTHOR
2306
8e07c86e 2307The DB_File interface was written by Paul Marquess
5bbd4290 2308E<lt>pmqs@cpan.orgE<gt>.
3b35bae3
AD
2309
2310=cut