| 1 | # DB_File.pm -- Perl 5 interface to Berkeley DB |
| 2 | # |
| 3 | # written by Paul Marquess (Paul.Marquess@btinternet.com) |
| 4 | # last modified 17th December 2000 |
| 5 | # version 1.75 |
| 6 | # |
| 7 | # Copyright (c) 1995-2000 Paul Marquess. All rights reserved. |
| 8 | # This program is free software; you can redistribute it and/or |
| 9 | # modify it under the same terms as Perl itself. |
| 10 | |
| 11 | |
| 12 | package DB_File::HASHINFO ; |
| 13 | |
| 14 | require 5.003 ; |
| 15 | |
| 16 | use warnings; |
| 17 | use strict; |
| 18 | use Carp; |
| 19 | require Tie::Hash; |
| 20 | @DB_File::HASHINFO::ISA = qw(Tie::Hash); |
| 21 | |
| 22 | sub new |
| 23 | { |
| 24 | my $pkg = shift ; |
| 25 | my %x ; |
| 26 | tie %x, $pkg ; |
| 27 | bless \%x, $pkg ; |
| 28 | } |
| 29 | |
| 30 | |
| 31 | sub TIEHASH |
| 32 | { |
| 33 | my $pkg = shift ; |
| 34 | |
| 35 | bless { VALID => { map {$_, 1} |
| 36 | qw( bsize ffactor nelem cachesize hash lorder) |
| 37 | }, |
| 38 | GOT => {} |
| 39 | }, $pkg ; |
| 40 | } |
| 41 | |
| 42 | |
| 43 | sub FETCH |
| 44 | { |
| 45 | my $self = shift ; |
| 46 | my $key = shift ; |
| 47 | |
| 48 | return $self->{GOT}{$key} if exists $self->{VALID}{$key} ; |
| 49 | |
| 50 | my $pkg = ref $self ; |
| 51 | croak "${pkg}::FETCH - Unknown element '$key'" ; |
| 52 | } |
| 53 | |
| 54 | |
| 55 | sub STORE |
| 56 | { |
| 57 | my $self = shift ; |
| 58 | my $key = shift ; |
| 59 | my $value = shift ; |
| 60 | |
| 61 | if ( exists $self->{VALID}{$key} ) |
| 62 | { |
| 63 | $self->{GOT}{$key} = $value ; |
| 64 | return ; |
| 65 | } |
| 66 | |
| 67 | my $pkg = ref $self ; |
| 68 | croak "${pkg}::STORE - Unknown element '$key'" ; |
| 69 | } |
| 70 | |
| 71 | sub DELETE |
| 72 | { |
| 73 | my $self = shift ; |
| 74 | my $key = shift ; |
| 75 | |
| 76 | if ( exists $self->{VALID}{$key} ) |
| 77 | { |
| 78 | delete $self->{GOT}{$key} ; |
| 79 | return ; |
| 80 | } |
| 81 | |
| 82 | my $pkg = ref $self ; |
| 83 | croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; |
| 84 | } |
| 85 | |
| 86 | sub EXISTS |
| 87 | { |
| 88 | my $self = shift ; |
| 89 | my $key = shift ; |
| 90 | |
| 91 | exists $self->{VALID}{$key} ; |
| 92 | } |
| 93 | |
| 94 | sub NotHere |
| 95 | { |
| 96 | my $self = shift ; |
| 97 | my $method = shift ; |
| 98 | |
| 99 | croak ref($self) . " does not define the method ${method}" ; |
| 100 | } |
| 101 | |
| 102 | sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") } |
| 103 | sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") } |
| 104 | sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") } |
| 105 | |
| 106 | package DB_File::RECNOINFO ; |
| 107 | |
| 108 | use warnings; |
| 109 | use strict ; |
| 110 | |
| 111 | @DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; |
| 112 | |
| 113 | sub TIEHASH |
| 114 | { |
| 115 | my $pkg = shift ; |
| 116 | |
| 117 | bless { VALID => { map {$_, 1} |
| 118 | qw( bval cachesize psize flags lorder reclen bfname ) |
| 119 | }, |
| 120 | GOT => {}, |
| 121 | }, $pkg ; |
| 122 | } |
| 123 | |
| 124 | package DB_File::BTREEINFO ; |
| 125 | |
| 126 | use warnings; |
| 127 | use strict ; |
| 128 | |
| 129 | @DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; |
| 130 | |
| 131 | sub TIEHASH |
| 132 | { |
| 133 | my $pkg = shift ; |
| 134 | |
| 135 | bless { VALID => { map {$_, 1} |
| 136 | qw( flags cachesize maxkeypage minkeypage psize |
| 137 | compare prefix lorder ) |
| 138 | }, |
| 139 | GOT => {}, |
| 140 | }, $pkg ; |
| 141 | } |
| 142 | |
| 143 | |
| 144 | package DB_File ; |
| 145 | |
| 146 | use warnings; |
| 147 | use strict; |
| 148 | use vars qw($VERSION @ISA @EXPORT $AUTOLOAD $DB_BTREE $DB_HASH $DB_RECNO |
| 149 | $db_version $use_XSLoader |
| 150 | ) ; |
| 151 | use Carp; |
| 152 | |
| 153 | |
| 154 | $VERSION = "1.75" ; |
| 155 | |
| 156 | #typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; |
| 157 | $DB_BTREE = new DB_File::BTREEINFO ; |
| 158 | $DB_HASH = new DB_File::HASHINFO ; |
| 159 | $DB_RECNO = new DB_File::RECNOINFO ; |
| 160 | |
| 161 | require Tie::Hash; |
| 162 | require Exporter; |
| 163 | use AutoLoader; |
| 164 | BEGIN { |
| 165 | $use_XSLoader = 1 ; |
| 166 | eval { require XSLoader } ; |
| 167 | |
| 168 | if ($@) { |
| 169 | $use_XSLoader = 0 ; |
| 170 | require DynaLoader; |
| 171 | @ISA = qw(DynaLoader); |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | push @ISA, qw(Tie::Hash Exporter); |
| 176 | @EXPORT = qw( |
| 177 | $DB_BTREE $DB_HASH $DB_RECNO |
| 178 | |
| 179 | BTREEMAGIC |
| 180 | BTREEVERSION |
| 181 | DB_LOCK |
| 182 | DB_SHMEM |
| 183 | DB_TXN |
| 184 | HASHMAGIC |
| 185 | HASHVERSION |
| 186 | MAX_PAGE_NUMBER |
| 187 | MAX_PAGE_OFFSET |
| 188 | MAX_REC_NUMBER |
| 189 | RET_ERROR |
| 190 | RET_SPECIAL |
| 191 | RET_SUCCESS |
| 192 | R_CURSOR |
| 193 | R_DUP |
| 194 | R_FIRST |
| 195 | R_FIXEDLEN |
| 196 | R_IAFTER |
| 197 | R_IBEFORE |
| 198 | R_LAST |
| 199 | R_NEXT |
| 200 | R_NOKEY |
| 201 | R_NOOVERWRITE |
| 202 | R_PREV |
| 203 | R_RECNOSYNC |
| 204 | R_SETCURSOR |
| 205 | R_SNAPSHOT |
| 206 | __R_UNUSED |
| 207 | |
| 208 | ); |
| 209 | |
| 210 | sub AUTOLOAD { |
| 211 | my($constname); |
| 212 | ($constname = $AUTOLOAD) =~ s/.*:://; |
| 213 | my $val = constant($constname, @_ ? $_[0] : 0); |
| 214 | if ($! != 0) { |
| 215 | if ($! =~ /Invalid/ || $!{EINVAL}) { |
| 216 | $AutoLoader::AUTOLOAD = $AUTOLOAD; |
| 217 | goto &AutoLoader::AUTOLOAD; |
| 218 | } |
| 219 | else { |
| 220 | my($pack,$file,$line) = caller; |
| 221 | croak "Your vendor has not defined DB macro $constname, used at $file line $line. |
| 222 | "; |
| 223 | } |
| 224 | } |
| 225 | eval "sub $AUTOLOAD { $val }"; |
| 226 | goto &$AUTOLOAD; |
| 227 | } |
| 228 | |
| 229 | |
| 230 | eval { |
| 231 | # Make all Fcntl O_XXX constants available for importing |
| 232 | require Fcntl; |
| 233 | my @O = grep /^O_/, @Fcntl::EXPORT; |
| 234 | Fcntl->import(@O); # first we import what we want to export |
| 235 | push(@EXPORT, @O); |
| 236 | }; |
| 237 | |
| 238 | if ($use_XSLoader) |
| 239 | { XSLoader::load("DB_File", $VERSION)} |
| 240 | else |
| 241 | { bootstrap DB_File $VERSION } |
| 242 | |
| 243 | # Preloaded methods go here. Autoload methods go after __END__, and are |
| 244 | # processed by the autosplit program. |
| 245 | |
| 246 | sub tie_hash_or_array |
| 247 | { |
| 248 | my (@arg) = @_ ; |
| 249 | my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ; |
| 250 | |
| 251 | $arg[4] = tied %{ $arg[4] } |
| 252 | if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ; |
| 253 | |
| 254 | # make recno in Berkeley DB version 2 work like recno in version 1. |
| 255 | if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and |
| 256 | $arg[1] and ! -e $arg[1]) { |
| 257 | open(FH, ">$arg[1]") or return undef ; |
| 258 | close FH ; |
| 259 | chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ; |
| 260 | } |
| 261 | |
| 262 | DoTie_($tieHASH, @arg) ; |
| 263 | } |
| 264 | |
| 265 | sub TIEHASH |
| 266 | { |
| 267 | tie_hash_or_array(@_) ; |
| 268 | } |
| 269 | |
| 270 | sub TIEARRAY |
| 271 | { |
| 272 | tie_hash_or_array(@_) ; |
| 273 | } |
| 274 | |
| 275 | sub CLEAR |
| 276 | { |
| 277 | my $self = shift; |
| 278 | my $key = 0 ; |
| 279 | my $value = "" ; |
| 280 | my $status = $self->seq($key, $value, R_FIRST()); |
| 281 | my @keys; |
| 282 | |
| 283 | while ($status == 0) { |
| 284 | push @keys, $key; |
| 285 | $status = $self->seq($key, $value, R_NEXT()); |
| 286 | } |
| 287 | foreach $key (reverse @keys) { |
| 288 | my $s = $self->del($key); |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | sub EXTEND { } |
| 293 | |
| 294 | sub STORESIZE |
| 295 | { |
| 296 | my $self = shift; |
| 297 | my $length = shift ; |
| 298 | my $current_length = $self->length() ; |
| 299 | |
| 300 | if ($length < $current_length) { |
| 301 | my $key ; |
| 302 | for ($key = $current_length - 1 ; $key >= $length ; -- $key) |
| 303 | { $self->del($key) } |
| 304 | } |
| 305 | elsif ($length > $current_length) { |
| 306 | $self->put($length-1, "") ; |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | sub find_dup |
| 311 | { |
| 312 | croak "Usage: \$db->find_dup(key,value)\n" |
| 313 | unless @_ == 3 ; |
| 314 | |
| 315 | my $db = shift ; |
| 316 | my ($origkey, $value_wanted) = @_ ; |
| 317 | my ($key, $value) = ($origkey, 0); |
| 318 | my ($status) = 0 ; |
| 319 | |
| 320 | for ($status = $db->seq($key, $value, R_CURSOR() ) ; |
| 321 | $status == 0 ; |
| 322 | $status = $db->seq($key, $value, R_NEXT() ) ) { |
| 323 | |
| 324 | return 0 if $key eq $origkey and $value eq $value_wanted ; |
| 325 | } |
| 326 | |
| 327 | return $status ; |
| 328 | } |
| 329 | |
| 330 | sub del_dup |
| 331 | { |
| 332 | croak "Usage: \$db->del_dup(key,value)\n" |
| 333 | unless @_ == 3 ; |
| 334 | |
| 335 | my $db = shift ; |
| 336 | my ($key, $value) = @_ ; |
| 337 | my ($status) = $db->find_dup($key, $value) ; |
| 338 | return $status if $status != 0 ; |
| 339 | |
| 340 | $status = $db->del($key, R_CURSOR() ) ; |
| 341 | return $status ; |
| 342 | } |
| 343 | |
| 344 | sub get_dup |
| 345 | { |
| 346 | croak "Usage: \$db->get_dup(key [,flag])\n" |
| 347 | unless @_ == 2 or @_ == 3 ; |
| 348 | |
| 349 | my $db = shift ; |
| 350 | my $key = shift ; |
| 351 | my $flag = shift ; |
| 352 | my $value = 0 ; |
| 353 | my $origkey = $key ; |
| 354 | my $wantarray = wantarray ; |
| 355 | my %values = () ; |
| 356 | my @values = () ; |
| 357 | my $counter = 0 ; |
| 358 | my $status = 0 ; |
| 359 | |
| 360 | # iterate through the database until either EOF ($status == 0) |
| 361 | # or a different key is encountered ($key ne $origkey). |
| 362 | for ($status = $db->seq($key, $value, R_CURSOR()) ; |
| 363 | $status == 0 and $key eq $origkey ; |
| 364 | $status = $db->seq($key, $value, R_NEXT()) ) { |
| 365 | |
| 366 | # save the value or count number of matches |
| 367 | if ($wantarray) { |
| 368 | if ($flag) |
| 369 | { ++ $values{$value} } |
| 370 | else |
| 371 | { push (@values, $value) } |
| 372 | } |
| 373 | else |
| 374 | { ++ $counter } |
| 375 | |
| 376 | } |
| 377 | |
| 378 | return ($wantarray ? ($flag ? %values : @values) : $counter) ; |
| 379 | } |
| 380 | |
| 381 | |
| 382 | 1; |
| 383 | __END__ |
| 384 | |
| 385 | =head1 NAME |
| 386 | |
| 387 | DB_File - Perl5 access to Berkeley DB version 1.x |
| 388 | |
| 389 | =head1 SYNOPSIS |
| 390 | |
| 391 | use DB_File ; |
| 392 | |
| 393 | [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; |
| 394 | [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; |
| 395 | [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; |
| 396 | |
| 397 | $status = $X->del($key [, $flags]) ; |
| 398 | $status = $X->put($key, $value [, $flags]) ; |
| 399 | $status = $X->get($key, $value [, $flags]) ; |
| 400 | $status = $X->seq($key, $value, $flags) ; |
| 401 | $status = $X->sync([$flags]) ; |
| 402 | $status = $X->fd ; |
| 403 | |
| 404 | # BTREE only |
| 405 | $count = $X->get_dup($key) ; |
| 406 | @list = $X->get_dup($key) ; |
| 407 | %list = $X->get_dup($key, 1) ; |
| 408 | $status = $X->find_dup($key, $value) ; |
| 409 | $status = $X->del_dup($key, $value) ; |
| 410 | |
| 411 | # RECNO only |
| 412 | $a = $X->length; |
| 413 | $a = $X->pop ; |
| 414 | $X->push(list); |
| 415 | $a = $X->shift; |
| 416 | $X->unshift(list); |
| 417 | |
| 418 | # DBM Filters |
| 419 | $old_filter = $db->filter_store_key ( sub { ... } ) ; |
| 420 | $old_filter = $db->filter_store_value( sub { ... } ) ; |
| 421 | $old_filter = $db->filter_fetch_key ( sub { ... } ) ; |
| 422 | $old_filter = $db->filter_fetch_value( sub { ... } ) ; |
| 423 | |
| 424 | untie %hash ; |
| 425 | untie @array ; |
| 426 | |
| 427 | =head1 DESCRIPTION |
| 428 | |
| 429 | B<DB_File> is a module which allows Perl programs to make use of the |
| 430 | facilities provided by Berkeley DB version 1.x (if you have a newer |
| 431 | version of DB, see L<Using DB_File with Berkeley DB version 2 or 3>). |
| 432 | It is assumed that you have a copy of the Berkeley DB manual pages at |
| 433 | hand when reading this documentation. The interface defined here |
| 434 | mirrors the Berkeley DB interface closely. |
| 435 | |
| 436 | Berkeley DB is a C library which provides a consistent interface to a |
| 437 | number of database formats. B<DB_File> provides an interface to all |
| 438 | three of the database types currently supported by Berkeley DB. |
| 439 | |
| 440 | The file types are: |
| 441 | |
| 442 | =over 5 |
| 443 | |
| 444 | =item B<DB_HASH> |
| 445 | |
| 446 | This database type allows arbitrary key/value pairs to be stored in data |
| 447 | files. This is equivalent to the functionality provided by other |
| 448 | hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, |
| 449 | the files created using DB_HASH are not compatible with any of the |
| 450 | other packages mentioned. |
| 451 | |
| 452 | A default hashing algorithm, which will be adequate for most |
| 453 | applications, is built into Berkeley DB. If you do need to use your own |
| 454 | hashing algorithm it is possible to write your own in Perl and have |
| 455 | B<DB_File> use it instead. |
| 456 | |
| 457 | =item B<DB_BTREE> |
| 458 | |
| 459 | The btree format allows arbitrary key/value pairs to be stored in a |
| 460 | sorted, balanced binary tree. |
| 461 | |
| 462 | As with the DB_HASH format, it is possible to provide a user defined |
| 463 | Perl routine to perform the comparison of keys. By default, though, the |
| 464 | keys are stored in lexical order. |
| 465 | |
| 466 | =item B<DB_RECNO> |
| 467 | |
| 468 | DB_RECNO allows both fixed-length and variable-length flat text files |
| 469 | to be manipulated using the same key/value pair interface as in DB_HASH |
| 470 | and DB_BTREE. In this case the key will consist of a record (line) |
| 471 | number. |
| 472 | |
| 473 | =back |
| 474 | |
| 475 | =head2 Using DB_File with Berkeley DB version 2 or 3 |
| 476 | |
| 477 | Although B<DB_File> is intended to be used with Berkeley DB version 1, |
| 478 | it can also be used with version 2.or 3 In this case the interface is |
| 479 | limited to the functionality provided by Berkeley DB 1.x. Anywhere the |
| 480 | version 2 or 3 interface differs, B<DB_File> arranges for it to work |
| 481 | like version 1. This feature allows B<DB_File> scripts that were built |
| 482 | with version 1 to be migrated to version 2 or 3 without any changes. |
| 483 | |
| 484 | If you want to make use of the new features available in Berkeley DB |
| 485 | 2.x or greater, use the Perl module B<BerkeleyDB> instead. |
| 486 | |
| 487 | B<Note:> The database file format has changed in both Berkeley DB |
| 488 | version 2 and 3. If you cannot recreate your databases, you must dump |
| 489 | any existing databases with the C<db_dump185> utility that comes with |
| 490 | Berkeley DB. |
| 491 | Once you have rebuilt DB_File to use Berkeley DB version 2 or 3, your |
| 492 | databases can be recreated using C<db_load>. Refer to the Berkeley DB |
| 493 | documentation for further details. |
| 494 | |
| 495 | Please read L<"COPYRIGHT"> before using version 2.x or 3.x of Berkeley |
| 496 | DB with DB_File. |
| 497 | |
| 498 | =head2 Interface to Berkeley DB |
| 499 | |
| 500 | B<DB_File> allows access to Berkeley DB files using the tie() mechanism |
| 501 | in Perl 5 (for full details, see L<perlfunc/tie()>). This facility |
| 502 | allows B<DB_File> to access Berkeley DB files using either an |
| 503 | associative array (for DB_HASH & DB_BTREE file types) or an ordinary |
| 504 | array (for the DB_RECNO file type). |
| 505 | |
| 506 | In addition to the tie() interface, it is also possible to access most |
| 507 | of the functions provided in the Berkeley DB API directly. |
| 508 | See L<THE API INTERFACE>. |
| 509 | |
| 510 | =head2 Opening a Berkeley DB Database File |
| 511 | |
| 512 | Berkeley DB uses the function dbopen() to open or create a database. |
| 513 | Here is the C prototype for dbopen(): |
| 514 | |
| 515 | DB* |
| 516 | dbopen (const char * file, int flags, int mode, |
| 517 | DBTYPE type, const void * openinfo) |
| 518 | |
| 519 | The parameter C<type> is an enumeration which specifies which of the 3 |
| 520 | interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. |
| 521 | Depending on which of these is actually chosen, the final parameter, |
| 522 | I<openinfo> points to a data structure which allows tailoring of the |
| 523 | specific interface method. |
| 524 | |
| 525 | This interface is handled slightly differently in B<DB_File>. Here is |
| 526 | an equivalent call using B<DB_File>: |
| 527 | |
| 528 | tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; |
| 529 | |
| 530 | The C<filename>, C<flags> and C<mode> parameters are the direct |
| 531 | equivalent of their dbopen() counterparts. The final parameter $DB_HASH |
| 532 | performs the function of both the C<type> and C<openinfo> parameters in |
| 533 | dbopen(). |
| 534 | |
| 535 | In the example above $DB_HASH is actually a pre-defined reference to a |
| 536 | hash object. B<DB_File> has three of these pre-defined references. |
| 537 | Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. |
| 538 | |
| 539 | The keys allowed in each of these pre-defined references is limited to |
| 540 | the names used in the equivalent C structure. So, for example, the |
| 541 | $DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, |
| 542 | C<ffactor>, C<hash>, C<lorder> and C<nelem>. |
| 543 | |
| 544 | To change one of these elements, just assign to it like this: |
| 545 | |
| 546 | $DB_HASH->{'cachesize'} = 10000 ; |
| 547 | |
| 548 | The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are |
| 549 | usually adequate for most applications. If you do need to create extra |
| 550 | instances of these objects, constructors are available for each file |
| 551 | type. |
| 552 | |
| 553 | Here are examples of the constructors and the valid options available |
| 554 | for DB_HASH, DB_BTREE and DB_RECNO respectively. |
| 555 | |
| 556 | $a = new DB_File::HASHINFO ; |
| 557 | $a->{'bsize'} ; |
| 558 | $a->{'cachesize'} ; |
| 559 | $a->{'ffactor'}; |
| 560 | $a->{'hash'} ; |
| 561 | $a->{'lorder'} ; |
| 562 | $a->{'nelem'} ; |
| 563 | |
| 564 | $b = new DB_File::BTREEINFO ; |
| 565 | $b->{'flags'} ; |
| 566 | $b->{'cachesize'} ; |
| 567 | $b->{'maxkeypage'} ; |
| 568 | $b->{'minkeypage'} ; |
| 569 | $b->{'psize'} ; |
| 570 | $b->{'compare'} ; |
| 571 | $b->{'prefix'} ; |
| 572 | $b->{'lorder'} ; |
| 573 | |
| 574 | $c = new DB_File::RECNOINFO ; |
| 575 | $c->{'bval'} ; |
| 576 | $c->{'cachesize'} ; |
| 577 | $c->{'psize'} ; |
| 578 | $c->{'flags'} ; |
| 579 | $c->{'lorder'} ; |
| 580 | $c->{'reclen'} ; |
| 581 | $c->{'bfname'} ; |
| 582 | |
| 583 | The values stored in the hashes above are mostly the direct equivalent |
| 584 | of their C counterpart. Like their C counterparts, all are set to a |
| 585 | default values - that means you don't have to set I<all> of the |
| 586 | values when you only want to change one. Here is an example: |
| 587 | |
| 588 | $a = new DB_File::HASHINFO ; |
| 589 | $a->{'cachesize'} = 12345 ; |
| 590 | tie %y, 'DB_File', "filename", $flags, 0777, $a ; |
| 591 | |
| 592 | A few of the options need extra discussion here. When used, the C |
| 593 | equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers |
| 594 | to C functions. In B<DB_File> these keys are used to store references |
| 595 | to Perl subs. Below are templates for each of the subs: |
| 596 | |
| 597 | sub hash |
| 598 | { |
| 599 | my ($data) = @_ ; |
| 600 | ... |
| 601 | # return the hash value for $data |
| 602 | return $hash ; |
| 603 | } |
| 604 | |
| 605 | sub compare |
| 606 | { |
| 607 | my ($key, $key2) = @_ ; |
| 608 | ... |
| 609 | # return 0 if $key1 eq $key2 |
| 610 | # -1 if $key1 lt $key2 |
| 611 | # 1 if $key1 gt $key2 |
| 612 | return (-1 , 0 or 1) ; |
| 613 | } |
| 614 | |
| 615 | sub prefix |
| 616 | { |
| 617 | my ($key, $key2) = @_ ; |
| 618 | ... |
| 619 | # return number of bytes of $key2 which are |
| 620 | # necessary to determine that it is greater than $key1 |
| 621 | return $bytes ; |
| 622 | } |
| 623 | |
| 624 | See L<Changing the BTREE sort order> for an example of using the |
| 625 | C<compare> template. |
| 626 | |
| 627 | If you are using the DB_RECNO interface and you intend making use of |
| 628 | C<bval>, you should check out L<The 'bval' Option>. |
| 629 | |
| 630 | =head2 Default Parameters |
| 631 | |
| 632 | It is possible to omit some or all of the final 4 parameters in the |
| 633 | call to C<tie> and let them take default values. As DB_HASH is the most |
| 634 | common file format used, the call: |
| 635 | |
| 636 | tie %A, "DB_File", "filename" ; |
| 637 | |
| 638 | is equivalent to: |
| 639 | |
| 640 | tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ; |
| 641 | |
| 642 | It is also possible to omit the filename parameter as well, so the |
| 643 | call: |
| 644 | |
| 645 | tie %A, "DB_File" ; |
| 646 | |
| 647 | is equivalent to: |
| 648 | |
| 649 | tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ; |
| 650 | |
| 651 | See L<In Memory Databases> for a discussion on the use of C<undef> |
| 652 | in place of a filename. |
| 653 | |
| 654 | =head2 In Memory Databases |
| 655 | |
| 656 | Berkeley DB allows the creation of in-memory databases by using NULL |
| 657 | (that is, a C<(char *)0> in C) in place of the filename. B<DB_File> |
| 658 | uses C<undef> instead of NULL to provide this functionality. |
| 659 | |
| 660 | =head1 DB_HASH |
| 661 | |
| 662 | The DB_HASH file format is probably the most commonly used of the three |
| 663 | file formats that B<DB_File> supports. It is also very straightforward |
| 664 | to use. |
| 665 | |
| 666 | =head2 A Simple Example |
| 667 | |
| 668 | This example shows how to create a database, add key/value pairs to the |
| 669 | database, delete keys/value pairs and finally how to enumerate the |
| 670 | contents of the database. |
| 671 | |
| 672 | use warnings ; |
| 673 | use strict ; |
| 674 | use DB_File ; |
| 675 | use vars qw( %h $k $v ) ; |
| 676 | |
| 677 | unlink "fruit" ; |
| 678 | tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0640, $DB_HASH |
| 679 | or die "Cannot open file 'fruit': $!\n"; |
| 680 | |
| 681 | # Add a few key/value pairs to the file |
| 682 | $h{"apple"} = "red" ; |
| 683 | $h{"orange"} = "orange" ; |
| 684 | $h{"banana"} = "yellow" ; |
| 685 | $h{"tomato"} = "red" ; |
| 686 | |
| 687 | # Check for existence of a key |
| 688 | print "Banana Exists\n\n" if $h{"banana"} ; |
| 689 | |
| 690 | # Delete a key/value pair. |
| 691 | delete $h{"apple"} ; |
| 692 | |
| 693 | # print the contents of the file |
| 694 | while (($k, $v) = each %h) |
| 695 | { print "$k -> $v\n" } |
| 696 | |
| 697 | untie %h ; |
| 698 | |
| 699 | here is the output: |
| 700 | |
| 701 | Banana Exists |
| 702 | |
| 703 | orange -> orange |
| 704 | tomato -> red |
| 705 | banana -> yellow |
| 706 | |
| 707 | Note that the like ordinary associative arrays, the order of the keys |
| 708 | retrieved is in an apparently random order. |
| 709 | |
| 710 | =head1 DB_BTREE |
| 711 | |
| 712 | The DB_BTREE format is useful when you want to store data in a given |
| 713 | order. By default the keys will be stored in lexical order, but as you |
| 714 | will see from the example shown in the next section, it is very easy to |
| 715 | define your own sorting function. |
| 716 | |
| 717 | =head2 Changing the BTREE sort order |
| 718 | |
| 719 | This script shows how to override the default sorting algorithm that |
| 720 | BTREE uses. Instead of using the normal lexical ordering, a case |
| 721 | insensitive compare function will be used. |
| 722 | |
| 723 | use warnings ; |
| 724 | use strict ; |
| 725 | use DB_File ; |
| 726 | |
| 727 | my %h ; |
| 728 | |
| 729 | sub Compare |
| 730 | { |
| 731 | my ($key1, $key2) = @_ ; |
| 732 | "\L$key1" cmp "\L$key2" ; |
| 733 | } |
| 734 | |
| 735 | # specify the Perl sub that will do the comparison |
| 736 | $DB_BTREE->{'compare'} = \&Compare ; |
| 737 | |
| 738 | unlink "tree" ; |
| 739 | tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0640, $DB_BTREE |
| 740 | or die "Cannot open file 'tree': $!\n" ; |
| 741 | |
| 742 | # Add a key/value pair to the file |
| 743 | $h{'Wall'} = 'Larry' ; |
| 744 | $h{'Smith'} = 'John' ; |
| 745 | $h{'mouse'} = 'mickey' ; |
| 746 | $h{'duck'} = 'donald' ; |
| 747 | |
| 748 | # Delete |
| 749 | delete $h{"duck"} ; |
| 750 | |
| 751 | # Cycle through the keys printing them in order. |
| 752 | # Note it is not necessary to sort the keys as |
| 753 | # the btree will have kept them in order automatically. |
| 754 | foreach (keys %h) |
| 755 | { print "$_\n" } |
| 756 | |
| 757 | untie %h ; |
| 758 | |
| 759 | Here is the output from the code above. |
| 760 | |
| 761 | mouse |
| 762 | Smith |
| 763 | Wall |
| 764 | |
| 765 | There are a few point to bear in mind if you want to change the |
| 766 | ordering in a BTREE database: |
| 767 | |
| 768 | =over 5 |
| 769 | |
| 770 | =item 1. |
| 771 | |
| 772 | The new compare function must be specified when you create the database. |
| 773 | |
| 774 | =item 2. |
| 775 | |
| 776 | You cannot change the ordering once the database has been created. Thus |
| 777 | you must use the same compare function every time you access the |
| 778 | database. |
| 779 | |
| 780 | =back |
| 781 | |
| 782 | =head2 Handling Duplicate Keys |
| 783 | |
| 784 | The BTREE file type optionally allows a single key to be associated |
| 785 | with an arbitrary number of values. This option is enabled by setting |
| 786 | the flags element of C<$DB_BTREE> to R_DUP when creating the database. |
| 787 | |
| 788 | There are some difficulties in using the tied hash interface if you |
| 789 | want to manipulate a BTREE database with duplicate keys. Consider this |
| 790 | code: |
| 791 | |
| 792 | use warnings ; |
| 793 | use strict ; |
| 794 | use DB_File ; |
| 795 | |
| 796 | use vars qw($filename %h ) ; |
| 797 | |
| 798 | $filename = "tree" ; |
| 799 | unlink $filename ; |
| 800 | |
| 801 | # Enable duplicate records |
| 802 | $DB_BTREE->{'flags'} = R_DUP ; |
| 803 | |
| 804 | tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
| 805 | or die "Cannot open $filename: $!\n"; |
| 806 | |
| 807 | # Add some key/value pairs to the file |
| 808 | $h{'Wall'} = 'Larry' ; |
| 809 | $h{'Wall'} = 'Brick' ; # Note the duplicate key |
| 810 | $h{'Wall'} = 'Brick' ; # Note the duplicate key and value |
| 811 | $h{'Smith'} = 'John' ; |
| 812 | $h{'mouse'} = 'mickey' ; |
| 813 | |
| 814 | # iterate through the associative array |
| 815 | # and print each key/value pair. |
| 816 | foreach (sort keys %h) |
| 817 | { print "$_ -> $h{$_}\n" } |
| 818 | |
| 819 | untie %h ; |
| 820 | |
| 821 | Here is the output: |
| 822 | |
| 823 | Smith -> John |
| 824 | Wall -> Larry |
| 825 | Wall -> Larry |
| 826 | Wall -> Larry |
| 827 | mouse -> mickey |
| 828 | |
| 829 | As you can see 3 records have been successfully created with key C<Wall> |
| 830 | - the only thing is, when they are retrieved from the database they |
| 831 | I<seem> to have the same value, namely C<Larry>. The problem is caused |
| 832 | by the way that the associative array interface works. Basically, when |
| 833 | the associative array interface is used to fetch the value associated |
| 834 | with a given key, it will only ever retrieve the first value. |
| 835 | |
| 836 | Although it may not be immediately obvious from the code above, the |
| 837 | associative array interface can be used to write values with duplicate |
| 838 | keys, but it cannot be used to read them back from the database. |
| 839 | |
| 840 | The way to get around this problem is to use the Berkeley DB API method |
| 841 | called C<seq>. This method allows sequential access to key/value |
| 842 | pairs. See L<THE API INTERFACE> for details of both the C<seq> method |
| 843 | and the API in general. |
| 844 | |
| 845 | Here is the script above rewritten using the C<seq> API method. |
| 846 | |
| 847 | use warnings ; |
| 848 | use strict ; |
| 849 | use DB_File ; |
| 850 | |
| 851 | use vars qw($filename $x %h $status $key $value) ; |
| 852 | |
| 853 | $filename = "tree" ; |
| 854 | unlink $filename ; |
| 855 | |
| 856 | # Enable duplicate records |
| 857 | $DB_BTREE->{'flags'} = R_DUP ; |
| 858 | |
| 859 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
| 860 | or die "Cannot open $filename: $!\n"; |
| 861 | |
| 862 | # Add some key/value pairs to the file |
| 863 | $h{'Wall'} = 'Larry' ; |
| 864 | $h{'Wall'} = 'Brick' ; # Note the duplicate key |
| 865 | $h{'Wall'} = 'Brick' ; # Note the duplicate key and value |
| 866 | $h{'Smith'} = 'John' ; |
| 867 | $h{'mouse'} = 'mickey' ; |
| 868 | |
| 869 | # iterate through the btree using seq |
| 870 | # and print each key/value pair. |
| 871 | $key = $value = 0 ; |
| 872 | for ($status = $x->seq($key, $value, R_FIRST) ; |
| 873 | $status == 0 ; |
| 874 | $status = $x->seq($key, $value, R_NEXT) ) |
| 875 | { print "$key -> $value\n" } |
| 876 | |
| 877 | undef $x ; |
| 878 | untie %h ; |
| 879 | |
| 880 | that prints: |
| 881 | |
| 882 | Smith -> John |
| 883 | Wall -> Brick |
| 884 | Wall -> Brick |
| 885 | Wall -> Larry |
| 886 | mouse -> mickey |
| 887 | |
| 888 | This time we have got all the key/value pairs, including the multiple |
| 889 | values associated with the key C<Wall>. |
| 890 | |
| 891 | To make life easier when dealing with duplicate keys, B<DB_File> comes with |
| 892 | a few utility methods. |
| 893 | |
| 894 | =head2 The get_dup() Method |
| 895 | |
| 896 | The C<get_dup> method assists in |
| 897 | reading duplicate values from BTREE databases. The method can take the |
| 898 | following forms: |
| 899 | |
| 900 | $count = $x->get_dup($key) ; |
| 901 | @list = $x->get_dup($key) ; |
| 902 | %list = $x->get_dup($key, 1) ; |
| 903 | |
| 904 | In a scalar context the method returns the number of values associated |
| 905 | with the key, C<$key>. |
| 906 | |
| 907 | In list context, it returns all the values which match C<$key>. Note |
| 908 | that the values will be returned in an apparently random order. |
| 909 | |
| 910 | In list context, if the second parameter is present and evaluates |
| 911 | TRUE, the method returns an associative array. The keys of the |
| 912 | associative array correspond to the values that matched in the BTREE |
| 913 | and the values of the array are a count of the number of times that |
| 914 | particular value occurred in the BTREE. |
| 915 | |
| 916 | So assuming the database created above, we can use C<get_dup> like |
| 917 | this: |
| 918 | |
| 919 | use warnings ; |
| 920 | use strict ; |
| 921 | use DB_File ; |
| 922 | |
| 923 | use vars qw($filename $x %h ) ; |
| 924 | |
| 925 | $filename = "tree" ; |
| 926 | |
| 927 | # Enable duplicate records |
| 928 | $DB_BTREE->{'flags'} = R_DUP ; |
| 929 | |
| 930 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
| 931 | or die "Cannot open $filename: $!\n"; |
| 932 | |
| 933 | my $cnt = $x->get_dup("Wall") ; |
| 934 | print "Wall occurred $cnt times\n" ; |
| 935 | |
| 936 | my %hash = $x->get_dup("Wall", 1) ; |
| 937 | print "Larry is there\n" if $hash{'Larry'} ; |
| 938 | print "There are $hash{'Brick'} Brick Walls\n" ; |
| 939 | |
| 940 | my @list = sort $x->get_dup("Wall") ; |
| 941 | print "Wall => [@list]\n" ; |
| 942 | |
| 943 | @list = $x->get_dup("Smith") ; |
| 944 | print "Smith => [@list]\n" ; |
| 945 | |
| 946 | @list = $x->get_dup("Dog") ; |
| 947 | print "Dog => [@list]\n" ; |
| 948 | |
| 949 | |
| 950 | and it will print: |
| 951 | |
| 952 | Wall occurred 3 times |
| 953 | Larry is there |
| 954 | There are 2 Brick Walls |
| 955 | Wall => [Brick Brick Larry] |
| 956 | Smith => [John] |
| 957 | Dog => [] |
| 958 | |
| 959 | =head2 The find_dup() Method |
| 960 | |
| 961 | $status = $X->find_dup($key, $value) ; |
| 962 | |
| 963 | This method checks for the existence of a specific key/value pair. If the |
| 964 | pair exists, the cursor is left pointing to the pair and the method |
| 965 | returns 0. Otherwise the method returns a non-zero value. |
| 966 | |
| 967 | Assuming the database from the previous example: |
| 968 | |
| 969 | use warnings ; |
| 970 | use strict ; |
| 971 | use DB_File ; |
| 972 | |
| 973 | use vars qw($filename $x %h $found) ; |
| 974 | |
| 975 | my $filename = "tree" ; |
| 976 | |
| 977 | # Enable duplicate records |
| 978 | $DB_BTREE->{'flags'} = R_DUP ; |
| 979 | |
| 980 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
| 981 | or die "Cannot open $filename: $!\n"; |
| 982 | |
| 983 | $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; |
| 984 | print "Larry Wall is $found there\n" ; |
| 985 | |
| 986 | $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ; |
| 987 | print "Harry Wall is $found there\n" ; |
| 988 | |
| 989 | undef $x ; |
| 990 | untie %h ; |
| 991 | |
| 992 | prints this |
| 993 | |
| 994 | Larry Wall is there |
| 995 | Harry Wall is not there |
| 996 | |
| 997 | |
| 998 | =head2 The del_dup() Method |
| 999 | |
| 1000 | $status = $X->del_dup($key, $value) ; |
| 1001 | |
| 1002 | This method deletes a specific key/value pair. It returns |
| 1003 | 0 if they exist and have been deleted successfully. |
| 1004 | Otherwise the method returns a non-zero value. |
| 1005 | |
| 1006 | Again assuming the existence of the C<tree> database |
| 1007 | |
| 1008 | use warnings ; |
| 1009 | use strict ; |
| 1010 | use DB_File ; |
| 1011 | |
| 1012 | use vars qw($filename $x %h $found) ; |
| 1013 | |
| 1014 | my $filename = "tree" ; |
| 1015 | |
| 1016 | # Enable duplicate records |
| 1017 | $DB_BTREE->{'flags'} = R_DUP ; |
| 1018 | |
| 1019 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
| 1020 | or die "Cannot open $filename: $!\n"; |
| 1021 | |
| 1022 | $x->del_dup("Wall", "Larry") ; |
| 1023 | |
| 1024 | $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; |
| 1025 | print "Larry Wall is $found there\n" ; |
| 1026 | |
| 1027 | undef $x ; |
| 1028 | untie %h ; |
| 1029 | |
| 1030 | prints this |
| 1031 | |
| 1032 | Larry Wall is not there |
| 1033 | |
| 1034 | =head2 Matching Partial Keys |
| 1035 | |
| 1036 | The BTREE interface has a feature which allows partial keys to be |
| 1037 | matched. This functionality is I<only> available when the C<seq> method |
| 1038 | is used along with the R_CURSOR flag. |
| 1039 | |
| 1040 | $x->seq($key, $value, R_CURSOR) ; |
| 1041 | |
| 1042 | Here is the relevant quote from the dbopen man page where it defines |
| 1043 | the use of the R_CURSOR flag with seq: |
| 1044 | |
| 1045 | Note, for the DB_BTREE access method, the returned key is not |
| 1046 | necessarily an exact match for the specified key. The returned key |
| 1047 | is the smallest key greater than or equal to the specified key, |
| 1048 | permitting partial key matches and range searches. |
| 1049 | |
| 1050 | In the example script below, the C<match> sub uses this feature to find |
| 1051 | and print the first matching key/value pair given a partial key. |
| 1052 | |
| 1053 | use warnings ; |
| 1054 | use strict ; |
| 1055 | use DB_File ; |
| 1056 | use Fcntl ; |
| 1057 | |
| 1058 | use vars qw($filename $x %h $st $key $value) ; |
| 1059 | |
| 1060 | sub match |
| 1061 | { |
| 1062 | my $key = shift ; |
| 1063 | my $value = 0; |
| 1064 | my $orig_key = $key ; |
| 1065 | $x->seq($key, $value, R_CURSOR) ; |
| 1066 | print "$orig_key\t-> $key\t-> $value\n" ; |
| 1067 | } |
| 1068 | |
| 1069 | $filename = "tree" ; |
| 1070 | unlink $filename ; |
| 1071 | |
| 1072 | $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_BTREE |
| 1073 | or die "Cannot open $filename: $!\n"; |
| 1074 | |
| 1075 | # Add some key/value pairs to the file |
| 1076 | $h{'mouse'} = 'mickey' ; |
| 1077 | $h{'Wall'} = 'Larry' ; |
| 1078 | $h{'Walls'} = 'Brick' ; |
| 1079 | $h{'Smith'} = 'John' ; |
| 1080 | |
| 1081 | |
| 1082 | $key = $value = 0 ; |
| 1083 | print "IN ORDER\n" ; |
| 1084 | for ($st = $x->seq($key, $value, R_FIRST) ; |
| 1085 | $st == 0 ; |
| 1086 | $st = $x->seq($key, $value, R_NEXT) ) |
| 1087 | |
| 1088 | { print "$key -> $value\n" } |
| 1089 | |
| 1090 | print "\nPARTIAL MATCH\n" ; |
| 1091 | |
| 1092 | match "Wa" ; |
| 1093 | match "A" ; |
| 1094 | match "a" ; |
| 1095 | |
| 1096 | undef $x ; |
| 1097 | untie %h ; |
| 1098 | |
| 1099 | Here is the output: |
| 1100 | |
| 1101 | IN ORDER |
| 1102 | Smith -> John |
| 1103 | Wall -> Larry |
| 1104 | Walls -> Brick |
| 1105 | mouse -> mickey |
| 1106 | |
| 1107 | PARTIAL MATCH |
| 1108 | Wa -> Wall -> Larry |
| 1109 | A -> Smith -> John |
| 1110 | a -> mouse -> mickey |
| 1111 | |
| 1112 | =head1 DB_RECNO |
| 1113 | |
| 1114 | DB_RECNO provides an interface to flat text files. Both variable and |
| 1115 | fixed length records are supported. |
| 1116 | |
| 1117 | In order to make RECNO more compatible with Perl, the array offset for |
| 1118 | all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. |
| 1119 | |
| 1120 | As with normal Perl arrays, a RECNO array can be accessed using |
| 1121 | negative indexes. The index -1 refers to the last element of the array, |
| 1122 | -2 the second last, and so on. Attempting to access an element before |
| 1123 | the start of the array will raise a fatal run-time error. |
| 1124 | |
| 1125 | =head2 The 'bval' Option |
| 1126 | |
| 1127 | The operation of the bval option warrants some discussion. Here is the |
| 1128 | definition of bval from the Berkeley DB 1.85 recno manual page: |
| 1129 | |
| 1130 | The delimiting byte to be used to mark the end of a |
| 1131 | record for variable-length records, and the pad charac- |
| 1132 | ter for fixed-length records. If no value is speci- |
| 1133 | fied, newlines (``\n'') are used to mark the end of |
| 1134 | variable-length records and fixed-length records are |
| 1135 | padded with spaces. |
| 1136 | |
| 1137 | The second sentence is wrong. In actual fact bval will only default to |
| 1138 | C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL |
| 1139 | openinfo parameter is used at all, the value that happens to be in bval |
| 1140 | will be used. That means you always have to specify bval when making |
| 1141 | use of any of the options in the openinfo parameter. This documentation |
| 1142 | error will be fixed in the next release of Berkeley DB. |
| 1143 | |
| 1144 | That clarifies the situation with regards Berkeley DB itself. What |
| 1145 | about B<DB_File>? Well, the behavior defined in the quote above is |
| 1146 | quite useful, so B<DB_File> conforms to it. |
| 1147 | |
| 1148 | That means that you can specify other options (e.g. cachesize) and |
| 1149 | still have bval default to C<"\n"> for variable length records, and |
| 1150 | space for fixed length records. |
| 1151 | |
| 1152 | =head2 A Simple Example |
| 1153 | |
| 1154 | Here is a simple example that uses RECNO (if you are using a version |
| 1155 | of Perl earlier than 5.004_57 this example won't work -- see |
| 1156 | L<Extra RECNO Methods> for a workaround). |
| 1157 | |
| 1158 | use warnings ; |
| 1159 | use strict ; |
| 1160 | use DB_File ; |
| 1161 | |
| 1162 | my $filename = "text" ; |
| 1163 | unlink $filename ; |
| 1164 | |
| 1165 | my @h ; |
| 1166 | tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0640, $DB_RECNO |
| 1167 | or die "Cannot open file 'text': $!\n" ; |
| 1168 | |
| 1169 | # Add a few key/value pairs to the file |
| 1170 | $h[0] = "orange" ; |
| 1171 | $h[1] = "blue" ; |
| 1172 | $h[2] = "yellow" ; |
| 1173 | |
| 1174 | push @h, "green", "black" ; |
| 1175 | |
| 1176 | my $elements = scalar @h ; |
| 1177 | print "The array contains $elements entries\n" ; |
| 1178 | |
| 1179 | my $last = pop @h ; |
| 1180 | print "popped $last\n" ; |
| 1181 | |
| 1182 | unshift @h, "white" ; |
| 1183 | my $first = shift @h ; |
| 1184 | print "shifted $first\n" ; |
| 1185 | |
| 1186 | # Check for existence of a key |
| 1187 | print "Element 1 Exists with value $h[1]\n" if $h[1] ; |
| 1188 | |
| 1189 | # use a negative index |
| 1190 | print "The last element is $h[-1]\n" ; |
| 1191 | print "The 2nd last element is $h[-2]\n" ; |
| 1192 | |
| 1193 | untie @h ; |
| 1194 | |
| 1195 | Here is the output from the script: |
| 1196 | |
| 1197 | The array contains 5 entries |
| 1198 | popped black |
| 1199 | shifted white |
| 1200 | Element 1 Exists with value blue |
| 1201 | The last element is green |
| 1202 | The 2nd last element is yellow |
| 1203 | |
| 1204 | =head2 Extra RECNO Methods |
| 1205 | |
| 1206 | If you are using a version of Perl earlier than 5.004_57, the tied |
| 1207 | array interface is quite limited. In the example script above |
| 1208 | C<push>, C<pop>, C<shift>, C<unshift> |
| 1209 | or determining the array length will not work with a tied array. |
| 1210 | |
| 1211 | To make the interface more useful for older versions of Perl, a number |
| 1212 | of methods are supplied with B<DB_File> to simulate the missing array |
| 1213 | operations. All these methods are accessed via the object returned from |
| 1214 | the tie call. |
| 1215 | |
| 1216 | Here are the methods: |
| 1217 | |
| 1218 | =over 5 |
| 1219 | |
| 1220 | =item B<$X-E<gt>push(list) ;> |
| 1221 | |
| 1222 | Pushes the elements of C<list> to the end of the array. |
| 1223 | |
| 1224 | =item B<$value = $X-E<gt>pop ;> |
| 1225 | |
| 1226 | Removes and returns the last element of the array. |
| 1227 | |
| 1228 | =item B<$X-E<gt>shift> |
| 1229 | |
| 1230 | Removes and returns the first element of the array. |
| 1231 | |
| 1232 | =item B<$X-E<gt>unshift(list) ;> |
| 1233 | |
| 1234 | Pushes the elements of C<list> to the start of the array. |
| 1235 | |
| 1236 | =item B<$X-E<gt>length> |
| 1237 | |
| 1238 | Returns the number of elements in the array. |
| 1239 | |
| 1240 | =back |
| 1241 | |
| 1242 | =head2 Another Example |
| 1243 | |
| 1244 | Here is a more complete example that makes use of some of the methods |
| 1245 | described above. It also makes use of the API interface directly (see |
| 1246 | L<THE API INTERFACE>). |
| 1247 | |
| 1248 | use warnings ; |
| 1249 | use strict ; |
| 1250 | use vars qw(@h $H $file $i) ; |
| 1251 | use DB_File ; |
| 1252 | use Fcntl ; |
| 1253 | |
| 1254 | $file = "text" ; |
| 1255 | |
| 1256 | unlink $file ; |
| 1257 | |
| 1258 | $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0640, $DB_RECNO |
| 1259 | or die "Cannot open file $file: $!\n" ; |
| 1260 | |
| 1261 | # first create a text file to play with |
| 1262 | $h[0] = "zero" ; |
| 1263 | $h[1] = "one" ; |
| 1264 | $h[2] = "two" ; |
| 1265 | $h[3] = "three" ; |
| 1266 | $h[4] = "four" ; |
| 1267 | |
| 1268 | |
| 1269 | # Print the records in order. |
| 1270 | # |
| 1271 | # The length method is needed here because evaluating a tied |
| 1272 | # array in a scalar context does not return the number of |
| 1273 | # elements in the array. |
| 1274 | |
| 1275 | print "\nORIGINAL\n" ; |
| 1276 | foreach $i (0 .. $H->length - 1) { |
| 1277 | print "$i: $h[$i]\n" ; |
| 1278 | } |
| 1279 | |
| 1280 | # use the push & pop methods |
| 1281 | $a = $H->pop ; |
| 1282 | $H->push("last") ; |
| 1283 | print "\nThe last record was [$a]\n" ; |
| 1284 | |
| 1285 | # and the shift & unshift methods |
| 1286 | $a = $H->shift ; |
| 1287 | $H->unshift("first") ; |
| 1288 | print "The first record was [$a]\n" ; |
| 1289 | |
| 1290 | # Use the API to add a new record after record 2. |
| 1291 | $i = 2 ; |
| 1292 | $H->put($i, "Newbie", R_IAFTER) ; |
| 1293 | |
| 1294 | # and a new record before record 1. |
| 1295 | $i = 1 ; |
| 1296 | $H->put($i, "New One", R_IBEFORE) ; |
| 1297 | |
| 1298 | # delete record 3 |
| 1299 | $H->del(3) ; |
| 1300 | |
| 1301 | # now print the records in reverse order |
| 1302 | print "\nREVERSE\n" ; |
| 1303 | for ($i = $H->length - 1 ; $i >= 0 ; -- $i) |
| 1304 | { print "$i: $h[$i]\n" } |
| 1305 | |
| 1306 | # same again, but use the API functions instead |
| 1307 | print "\nREVERSE again\n" ; |
| 1308 | my ($s, $k, $v) = (0, 0, 0) ; |
| 1309 | for ($s = $H->seq($k, $v, R_LAST) ; |
| 1310 | $s == 0 ; |
| 1311 | $s = $H->seq($k, $v, R_PREV)) |
| 1312 | { print "$k: $v\n" } |
| 1313 | |
| 1314 | undef $H ; |
| 1315 | untie @h ; |
| 1316 | |
| 1317 | and this is what it outputs: |
| 1318 | |
| 1319 | ORIGINAL |
| 1320 | 0: zero |
| 1321 | 1: one |
| 1322 | 2: two |
| 1323 | 3: three |
| 1324 | 4: four |
| 1325 | |
| 1326 | The last record was [four] |
| 1327 | The first record was [zero] |
| 1328 | |
| 1329 | REVERSE |
| 1330 | 5: last |
| 1331 | 4: three |
| 1332 | 3: Newbie |
| 1333 | 2: one |
| 1334 | 1: New One |
| 1335 | 0: first |
| 1336 | |
| 1337 | REVERSE again |
| 1338 | 5: last |
| 1339 | 4: three |
| 1340 | 3: Newbie |
| 1341 | 2: one |
| 1342 | 1: New One |
| 1343 | 0: first |
| 1344 | |
| 1345 | Notes: |
| 1346 | |
| 1347 | =over 5 |
| 1348 | |
| 1349 | =item 1. |
| 1350 | |
| 1351 | Rather than iterating through the array, C<@h> like this: |
| 1352 | |
| 1353 | foreach $i (@h) |
| 1354 | |
| 1355 | it is necessary to use either this: |
| 1356 | |
| 1357 | foreach $i (0 .. $H->length - 1) |
| 1358 | |
| 1359 | or this: |
| 1360 | |
| 1361 | for ($a = $H->get($k, $v, R_FIRST) ; |
| 1362 | $a == 0 ; |
| 1363 | $a = $H->get($k, $v, R_NEXT) ) |
| 1364 | |
| 1365 | =item 2. |
| 1366 | |
| 1367 | Notice that both times the C<put> method was used the record index was |
| 1368 | specified using a variable, C<$i>, rather than the literal value |
| 1369 | itself. This is because C<put> will return the record number of the |
| 1370 | inserted line via that parameter. |
| 1371 | |
| 1372 | =back |
| 1373 | |
| 1374 | =head1 THE API INTERFACE |
| 1375 | |
| 1376 | As well as accessing Berkeley DB using a tied hash or array, it is also |
| 1377 | possible to make direct use of most of the API functions defined in the |
| 1378 | Berkeley DB documentation. |
| 1379 | |
| 1380 | To do this you need to store a copy of the object returned from the tie. |
| 1381 | |
| 1382 | $db = tie %hash, "DB_File", "filename" ; |
| 1383 | |
| 1384 | Once you have done that, you can access the Berkeley DB API functions |
| 1385 | as B<DB_File> methods directly like this: |
| 1386 | |
| 1387 | $db->put($key, $value, R_NOOVERWRITE) ; |
| 1388 | |
| 1389 | B<Important:> If you have saved a copy of the object returned from |
| 1390 | C<tie>, the underlying database file will I<not> be closed until both |
| 1391 | the tied variable is untied and all copies of the saved object are |
| 1392 | destroyed. |
| 1393 | |
| 1394 | use DB_File ; |
| 1395 | $db = tie %hash, "DB_File", "filename" |
| 1396 | or die "Cannot tie filename: $!" ; |
| 1397 | ... |
| 1398 | undef $db ; |
| 1399 | untie %hash ; |
| 1400 | |
| 1401 | See L<The untie() Gotcha> for more details. |
| 1402 | |
| 1403 | All the functions defined in L<dbopen> are available except for |
| 1404 | close() and dbopen() itself. The B<DB_File> method interface to the |
| 1405 | supported functions have been implemented to mirror the way Berkeley DB |
| 1406 | works whenever possible. In particular note that: |
| 1407 | |
| 1408 | =over 5 |
| 1409 | |
| 1410 | =item * |
| 1411 | |
| 1412 | The methods return a status value. All return 0 on success. |
| 1413 | All return -1 to signify an error and set C<$!> to the exact |
| 1414 | error code. The return code 1 generally (but not always) means that the |
| 1415 | key specified did not exist in the database. |
| 1416 | |
| 1417 | Other return codes are defined. See below and in the Berkeley DB |
| 1418 | documentation for details. The Berkeley DB documentation should be used |
| 1419 | as the definitive source. |
| 1420 | |
| 1421 | =item * |
| 1422 | |
| 1423 | Whenever a Berkeley DB function returns data via one of its parameters, |
| 1424 | the equivalent B<DB_File> method does exactly the same. |
| 1425 | |
| 1426 | =item * |
| 1427 | |
| 1428 | If you are careful, it is possible to mix API calls with the tied |
| 1429 | hash/array interface in the same piece of code. Although only a few of |
| 1430 | the methods used to implement the tied interface currently make use of |
| 1431 | the cursor, you should always assume that the cursor has been changed |
| 1432 | any time the tied hash/array interface is used. As an example, this |
| 1433 | code will probably not do what you expect: |
| 1434 | |
| 1435 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE |
| 1436 | or die "Cannot tie $filename: $!" ; |
| 1437 | |
| 1438 | # Get the first key/value pair and set the cursor |
| 1439 | $X->seq($key, $value, R_FIRST) ; |
| 1440 | |
| 1441 | # this line will modify the cursor |
| 1442 | $count = scalar keys %x ; |
| 1443 | |
| 1444 | # Get the second key/value pair. |
| 1445 | # oops, it didn't, it got the last key/value pair! |
| 1446 | $X->seq($key, $value, R_NEXT) ; |
| 1447 | |
| 1448 | The code above can be rearranged to get around the problem, like this: |
| 1449 | |
| 1450 | $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE |
| 1451 | or die "Cannot tie $filename: $!" ; |
| 1452 | |
| 1453 | # this line will modify the cursor |
| 1454 | $count = scalar keys %x ; |
| 1455 | |
| 1456 | # Get the first key/value pair and set the cursor |
| 1457 | $X->seq($key, $value, R_FIRST) ; |
| 1458 | |
| 1459 | # Get the second key/value pair. |
| 1460 | # worked this time. |
| 1461 | $X->seq($key, $value, R_NEXT) ; |
| 1462 | |
| 1463 | =back |
| 1464 | |
| 1465 | All the constants defined in L<dbopen> for use in the flags parameters |
| 1466 | in the methods defined below are also available. Refer to the Berkeley |
| 1467 | DB documentation for the precise meaning of the flags values. |
| 1468 | |
| 1469 | Below is a list of the methods available. |
| 1470 | |
| 1471 | =over 5 |
| 1472 | |
| 1473 | =item B<$status = $X-E<gt>get($key, $value [, $flags]) ;> |
| 1474 | |
| 1475 | Given a key (C<$key>) this method reads the value associated with it |
| 1476 | from the database. The value read from the database is returned in the |
| 1477 | C<$value> parameter. |
| 1478 | |
| 1479 | If the key does not exist the method returns 1. |
| 1480 | |
| 1481 | No flags are currently defined for this method. |
| 1482 | |
| 1483 | =item B<$status = $X-E<gt>put($key, $value [, $flags]) ;> |
| 1484 | |
| 1485 | Stores the key/value pair in the database. |
| 1486 | |
| 1487 | If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter |
| 1488 | will have the record number of the inserted key/value pair set. |
| 1489 | |
| 1490 | Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and |
| 1491 | R_SETCURSOR. |
| 1492 | |
| 1493 | =item B<$status = $X-E<gt>del($key [, $flags]) ;> |
| 1494 | |
| 1495 | Removes all key/value pairs with key C<$key> from the database. |
| 1496 | |
| 1497 | A return code of 1 means that the requested key was not in the |
| 1498 | database. |
| 1499 | |
| 1500 | R_CURSOR is the only valid flag at present. |
| 1501 | |
| 1502 | =item B<$status = $X-E<gt>fd ;> |
| 1503 | |
| 1504 | Returns the file descriptor for the underlying database. |
| 1505 | |
| 1506 | See L<Locking: The Trouble with fd> for an explanation for why you should |
| 1507 | not use C<fd> to lock your database. |
| 1508 | |
| 1509 | =item B<$status = $X-E<gt>seq($key, $value, $flags) ;> |
| 1510 | |
| 1511 | This interface allows sequential retrieval from the database. See |
| 1512 | L<dbopen> for full details. |
| 1513 | |
| 1514 | Both the C<$key> and C<$value> parameters will be set to the key/value |
| 1515 | pair read from the database. |
| 1516 | |
| 1517 | The flags parameter is mandatory. The valid flag values are R_CURSOR, |
| 1518 | R_FIRST, R_LAST, R_NEXT and R_PREV. |
| 1519 | |
| 1520 | =item B<$status = $X-E<gt>sync([$flags]) ;> |
| 1521 | |
| 1522 | Flushes any cached buffers to disk. |
| 1523 | |
| 1524 | R_RECNOSYNC is the only valid flag at present. |
| 1525 | |
| 1526 | =back |
| 1527 | |
| 1528 | =head1 DBM FILTERS |
| 1529 | |
| 1530 | A DBM Filter is a piece of code that is be used when you I<always> |
| 1531 | want to make the same transformation to all keys and/or values in a |
| 1532 | DBM database. |
| 1533 | |
| 1534 | There are four methods associated with DBM Filters. All work identically, |
| 1535 | and each is used to install (or uninstall) a single DBM Filter. Each |
| 1536 | expects a single parameter, namely a reference to a sub. The only |
| 1537 | difference between them is the place that the filter is installed. |
| 1538 | |
| 1539 | To summarise: |
| 1540 | |
| 1541 | =over 5 |
| 1542 | |
| 1543 | =item B<filter_store_key> |
| 1544 | |
| 1545 | If a filter has been installed with this method, it will be invoked |
| 1546 | every time you write a key to a DBM database. |
| 1547 | |
| 1548 | =item B<filter_store_value> |
| 1549 | |
| 1550 | If a filter has been installed with this method, it will be invoked |
| 1551 | every time you write a value to a DBM database. |
| 1552 | |
| 1553 | |
| 1554 | =item B<filter_fetch_key> |
| 1555 | |
| 1556 | If a filter has been installed with this method, it will be invoked |
| 1557 | every time you read a key from a DBM database. |
| 1558 | |
| 1559 | =item B<filter_fetch_value> |
| 1560 | |
| 1561 | If a filter has been installed with this method, it will be invoked |
| 1562 | every time you read a value from a DBM database. |
| 1563 | |
| 1564 | =back |
| 1565 | |
| 1566 | You can use any combination of the methods, from none, to all four. |
| 1567 | |
| 1568 | All filter methods return the existing filter, if present, or C<undef> |
| 1569 | in not. |
| 1570 | |
| 1571 | To delete a filter pass C<undef> to it. |
| 1572 | |
| 1573 | =head2 The Filter |
| 1574 | |
| 1575 | When each filter is called by Perl, a local copy of C<$_> will contain |
| 1576 | the key or value to be filtered. Filtering is achieved by modifying |
| 1577 | the contents of C<$_>. The return code from the filter is ignored. |
| 1578 | |
| 1579 | =head2 An Example -- the NULL termination problem. |
| 1580 | |
| 1581 | Consider the following scenario. You have a DBM database |
| 1582 | that you need to share with a third-party C application. The C application |
| 1583 | assumes that I<all> keys and values are NULL terminated. Unfortunately |
| 1584 | when Perl writes to DBM databases it doesn't use NULL termination, so |
| 1585 | your Perl application will have to manage NULL termination itself. When |
| 1586 | you write to the database you will have to use something like this: |
| 1587 | |
| 1588 | $hash{"$key\0"} = "$value\0" ; |
| 1589 | |
| 1590 | Similarly the NULL needs to be taken into account when you are considering |
| 1591 | the length of existing keys/values. |
| 1592 | |
| 1593 | It would be much better if you could ignore the NULL terminations issue |
| 1594 | in the main application code and have a mechanism that automatically |
| 1595 | added the terminating NULL to all keys and values whenever you write to |
| 1596 | the database and have them removed when you read from the database. As I'm |
| 1597 | sure you have already guessed, this is a problem that DBM Filters can |
| 1598 | fix very easily. |
| 1599 | |
| 1600 | use warnings ; |
| 1601 | use strict ; |
| 1602 | use DB_File ; |
| 1603 | |
| 1604 | my %hash ; |
| 1605 | my $filename = "/tmp/filt" ; |
| 1606 | unlink $filename ; |
| 1607 | |
| 1608 | my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH |
| 1609 | or die "Cannot open $filename: $!\n" ; |
| 1610 | |
| 1611 | # Install DBM Filters |
| 1612 | $db->filter_fetch_key ( sub { s/\0$// } ) ; |
| 1613 | $db->filter_store_key ( sub { $_ .= "\0" } ) ; |
| 1614 | $db->filter_fetch_value( sub { s/\0$// } ) ; |
| 1615 | $db->filter_store_value( sub { $_ .= "\0" } ) ; |
| 1616 | |
| 1617 | $hash{"abc"} = "def" ; |
| 1618 | my $a = $hash{"ABC"} ; |
| 1619 | # ... |
| 1620 | undef $db ; |
| 1621 | untie %hash ; |
| 1622 | |
| 1623 | Hopefully the contents of each of the filters should be |
| 1624 | self-explanatory. Both "fetch" filters remove the terminating NULL, |
| 1625 | and both "store" filters add a terminating NULL. |
| 1626 | |
| 1627 | |
| 1628 | =head2 Another Example -- Key is a C int. |
| 1629 | |
| 1630 | Here is another real-life example. By default, whenever Perl writes to |
| 1631 | a DBM database it always writes the key and value as strings. So when |
| 1632 | you use this: |
| 1633 | |
| 1634 | $hash{12345} = "soemthing" ; |
| 1635 | |
| 1636 | the key 12345 will get stored in the DBM database as the 5 byte string |
| 1637 | "12345". If you actually want the key to be stored in the DBM database |
| 1638 | as a C int, you will have to use C<pack> when writing, and C<unpack> |
| 1639 | when reading. |
| 1640 | |
| 1641 | Here is a DBM Filter that does it: |
| 1642 | |
| 1643 | use warnings ; |
| 1644 | use strict ; |
| 1645 | use DB_File ; |
| 1646 | my %hash ; |
| 1647 | my $filename = "/tmp/filt" ; |
| 1648 | unlink $filename ; |
| 1649 | |
| 1650 | |
| 1651 | my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH |
| 1652 | or die "Cannot open $filename: $!\n" ; |
| 1653 | |
| 1654 | $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ; |
| 1655 | $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ; |
| 1656 | $hash{123} = "def" ; |
| 1657 | # ... |
| 1658 | undef $db ; |
| 1659 | untie %hash ; |
| 1660 | |
| 1661 | This time only two filters have been used -- we only need to manipulate |
| 1662 | the contents of the key, so it wasn't necessary to install any value |
| 1663 | filters. |
| 1664 | |
| 1665 | =head1 HINTS AND TIPS |
| 1666 | |
| 1667 | |
| 1668 | =head2 Locking: The Trouble with fd |
| 1669 | |
| 1670 | Until version 1.72 of this module, the recommended technique for locking |
| 1671 | B<DB_File> databases was to flock the filehandle returned from the "fd" |
| 1672 | function. Unfortunately this technique has been shown to be fundamentally |
| 1673 | flawed (Kudos to David Harris for tracking this down). Use it at your own |
| 1674 | peril! |
| 1675 | |
| 1676 | The locking technique went like this. |
| 1677 | |
| 1678 | $db = tie(%db, 'DB_File', '/tmp/foo.db', O_CREAT|O_RDWR, 0644) |
| 1679 | || die "dbcreat /tmp/foo.db $!"; |
| 1680 | $fd = $db->fd; |
| 1681 | open(DB_FH, "+<&=$fd") || die "dup $!"; |
| 1682 | flock (DB_FH, LOCK_EX) || die "flock: $!"; |
| 1683 | ... |
| 1684 | $db{"Tom"} = "Jerry" ; |
| 1685 | ... |
| 1686 | flock(DB_FH, LOCK_UN); |
| 1687 | undef $db; |
| 1688 | untie %db; |
| 1689 | close(DB_FH); |
| 1690 | |
| 1691 | In simple terms, this is what happens: |
| 1692 | |
| 1693 | =over 5 |
| 1694 | |
| 1695 | =item 1. |
| 1696 | |
| 1697 | Use "tie" to open the database. |
| 1698 | |
| 1699 | =item 2. |
| 1700 | |
| 1701 | Lock the database with fd & flock. |
| 1702 | |
| 1703 | =item 3. |
| 1704 | |
| 1705 | Read & Write to the database. |
| 1706 | |
| 1707 | =item 4. |
| 1708 | |
| 1709 | Unlock and close the database. |
| 1710 | |
| 1711 | =back |
| 1712 | |
| 1713 | Here is the crux of the problem. A side-effect of opening the B<DB_File> |
| 1714 | database in step 2 is that an initial block from the database will get |
| 1715 | read from disk and cached in memory. |
| 1716 | |
| 1717 | To see why this is a problem, consider what can happen when two processes, |
| 1718 | say "A" and "B", both want to update the same B<DB_File> database |
| 1719 | using the locking steps outlined above. Assume process "A" has already |
| 1720 | opened the database and has a write lock, but it hasn't actually updated |
| 1721 | the database yet (it has finished step 2, but not started step 3 yet). Now |
| 1722 | process "B" tries to open the same database - step 1 will succeed, |
| 1723 | but it will block on step 2 until process "A" releases the lock. The |
| 1724 | important thing to notice here is that at this point in time both |
| 1725 | processes will have cached identical initial blocks from the database. |
| 1726 | |
| 1727 | Now process "A" updates the database and happens to change some of the |
| 1728 | data held in the initial buffer. Process "A" terminates, flushing |
| 1729 | all cached data to disk and releasing the database lock. At this point |
| 1730 | the database on disk will correctly reflect the changes made by process |
| 1731 | "A". |
| 1732 | |
| 1733 | With the lock released, process "B" can now continue. It also updates the |
| 1734 | database and unfortunately it too modifies the data that was in its |
| 1735 | initial buffer. Once that data gets flushed to disk it will overwrite |
| 1736 | some/all of the changes process "A" made to the database. |
| 1737 | |
| 1738 | The result of this scenario is at best a database that doesn't contain |
| 1739 | what you expect. At worst the database will corrupt. |
| 1740 | |
| 1741 | The above won't happen every time competing process update the same |
| 1742 | B<DB_File> database, but it does illustrate why the technique should |
| 1743 | not be used. |
| 1744 | |
| 1745 | =head2 Safe ways to lock a database |
| 1746 | |
| 1747 | Starting with version 2.x, Berkeley DB has internal support for locking. |
| 1748 | The companion module to this one, B<BerkeleyDB>, provides an interface |
| 1749 | to this locking functionality. If you are serious about locking |
| 1750 | Berkeley DB databases, I strongly recommend using B<BerkeleyDB>. |
| 1751 | |
| 1752 | If using B<BerkeleyDB> isn't an option, there are a number of modules |
| 1753 | available on CPAN that can be used to implement locking. Each one |
| 1754 | implements locking differently and has different goals in mind. It is |
| 1755 | therefore worth knowing the difference, so that you can pick the right |
| 1756 | one for your application. Here are the three locking wrappers: |
| 1757 | |
| 1758 | =over 5 |
| 1759 | |
| 1760 | =item B<Tie::DB_Lock> |
| 1761 | |
| 1762 | A B<DB_File> wrapper which creates copies of the database file for |
| 1763 | read access, so that you have a kind of a multiversioning concurrent read |
| 1764 | system. However, updates are still serial. Use for databases where reads |
| 1765 | may be lengthy and consistency problems may occur. |
| 1766 | |
| 1767 | =item B<Tie::DB_LockFile> |
| 1768 | |
| 1769 | A B<DB_File> wrapper that has the ability to lock and unlock the database |
| 1770 | while it is being used. Avoids the tie-before-flock problem by simply |
| 1771 | re-tie-ing the database when you get or drop a lock. Because of the |
| 1772 | flexibility in dropping and re-acquiring the lock in the middle of a |
| 1773 | session, this can be massaged into a system that will work with long |
| 1774 | updates and/or reads if the application follows the hints in the POD |
| 1775 | documentation. |
| 1776 | |
| 1777 | =item B<DB_File::Lock> |
| 1778 | |
| 1779 | An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile |
| 1780 | before tie-ing the database and drops the lock after the untie. Allows |
| 1781 | one to use the same lockfile for multiple databases to avoid deadlock |
| 1782 | problems, if desired. Use for databases where updates are reads are |
| 1783 | quick and simple flock locking semantics are enough. |
| 1784 | |
| 1785 | =back |
| 1786 | |
| 1787 | =head2 Sharing Databases With C Applications |
| 1788 | |
| 1789 | There is no technical reason why a Berkeley DB database cannot be |
| 1790 | shared by both a Perl and a C application. |
| 1791 | |
| 1792 | The vast majority of problems that are reported in this area boil down |
| 1793 | to the fact that C strings are NULL terminated, whilst Perl strings are |
| 1794 | not. See L<DBM FILTERS> for a generic way to work around this problem. |
| 1795 | |
| 1796 | Here is a real example. Netscape 2.0 keeps a record of the locations you |
| 1797 | visit along with the time you last visited them in a DB_HASH database. |
| 1798 | This is usually stored in the file F<~/.netscape/history.db>. The key |
| 1799 | field in the database is the location string and the value field is the |
| 1800 | time the location was last visited stored as a 4 byte binary value. |
| 1801 | |
| 1802 | If you haven't already guessed, the location string is stored with a |
| 1803 | terminating NULL. This means you need to be careful when accessing the |
| 1804 | database. |
| 1805 | |
| 1806 | Here is a snippet of code that is loosely based on Tom Christiansen's |
| 1807 | I<ggh> script (available from your nearest CPAN archive in |
| 1808 | F<authors/id/TOMC/scripts/nshist.gz>). |
| 1809 | |
| 1810 | use warnings ; |
| 1811 | use strict ; |
| 1812 | use DB_File ; |
| 1813 | use Fcntl ; |
| 1814 | |
| 1815 | use vars qw( $dotdir $HISTORY %hist_db $href $binary_time $date ) ; |
| 1816 | $dotdir = $ENV{HOME} || $ENV{LOGNAME}; |
| 1817 | |
| 1818 | $HISTORY = "$dotdir/.netscape/history.db"; |
| 1819 | |
| 1820 | tie %hist_db, 'DB_File', $HISTORY |
| 1821 | or die "Cannot open $HISTORY: $!\n" ;; |
| 1822 | |
| 1823 | # Dump the complete database |
| 1824 | while ( ($href, $binary_time) = each %hist_db ) { |
| 1825 | |
| 1826 | # remove the terminating NULL |
| 1827 | $href =~ s/\x00$// ; |
| 1828 | |
| 1829 | # convert the binary time into a user friendly string |
| 1830 | $date = localtime unpack("V", $binary_time); |
| 1831 | print "$date $href\n" ; |
| 1832 | } |
| 1833 | |
| 1834 | # check for the existence of a specific key |
| 1835 | # remember to add the NULL |
| 1836 | if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) { |
| 1837 | $date = localtime unpack("V", $binary_time) ; |
| 1838 | print "Last visited mox.perl.com on $date\n" ; |
| 1839 | } |
| 1840 | else { |
| 1841 | print "Never visited mox.perl.com\n" |
| 1842 | } |
| 1843 | |
| 1844 | untie %hist_db ; |
| 1845 | |
| 1846 | =head2 The untie() Gotcha |
| 1847 | |
| 1848 | If you make use of the Berkeley DB API, it is I<very> strongly |
| 1849 | recommended that you read L<perltie/The untie Gotcha>. |
| 1850 | |
| 1851 | Even if you don't currently make use of the API interface, it is still |
| 1852 | worth reading it. |
| 1853 | |
| 1854 | Here is an example which illustrates the problem from a B<DB_File> |
| 1855 | perspective: |
| 1856 | |
| 1857 | use DB_File ; |
| 1858 | use Fcntl ; |
| 1859 | |
| 1860 | my %x ; |
| 1861 | my $X ; |
| 1862 | |
| 1863 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC |
| 1864 | or die "Cannot tie first time: $!" ; |
| 1865 | |
| 1866 | $x{123} = 456 ; |
| 1867 | |
| 1868 | untie %x ; |
| 1869 | |
| 1870 | tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT |
| 1871 | or die "Cannot tie second time: $!" ; |
| 1872 | |
| 1873 | untie %x ; |
| 1874 | |
| 1875 | When run, the script will produce this error message: |
| 1876 | |
| 1877 | Cannot tie second time: Invalid argument at bad.file line 14. |
| 1878 | |
| 1879 | Although the error message above refers to the second tie() statement |
| 1880 | in the script, the source of the problem is really with the untie() |
| 1881 | statement that precedes it. |
| 1882 | |
| 1883 | Having read L<perltie> you will probably have already guessed that the |
| 1884 | error is caused by the extra copy of the tied object stored in C<$X>. |
| 1885 | If you haven't, then the problem boils down to the fact that the |
| 1886 | B<DB_File> destructor, DESTROY, will not be called until I<all> |
| 1887 | references to the tied object are destroyed. Both the tied variable, |
| 1888 | C<%x>, and C<$X> above hold a reference to the object. The call to |
| 1889 | untie() will destroy the first, but C<$X> still holds a valid |
| 1890 | reference, so the destructor will not get called and the database file |
| 1891 | F<tst.fil> will remain open. The fact that Berkeley DB then reports the |
| 1892 | attempt to open a database that is already open via the catch-all |
| 1893 | "Invalid argument" doesn't help. |
| 1894 | |
| 1895 | If you run the script with the C<-w> flag the error message becomes: |
| 1896 | |
| 1897 | untie attempted while 1 inner references still exist at bad.file line 12. |
| 1898 | Cannot tie second time: Invalid argument at bad.file line 14. |
| 1899 | |
| 1900 | which pinpoints the real problem. Finally the script can now be |
| 1901 | modified to fix the original problem by destroying the API object |
| 1902 | before the untie: |
| 1903 | |
| 1904 | ... |
| 1905 | $x{123} = 456 ; |
| 1906 | |
| 1907 | undef $X ; |
| 1908 | untie %x ; |
| 1909 | |
| 1910 | $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT |
| 1911 | ... |
| 1912 | |
| 1913 | |
| 1914 | =head1 COMMON QUESTIONS |
| 1915 | |
| 1916 | =head2 Why is there Perl source in my database? |
| 1917 | |
| 1918 | If you look at the contents of a database file created by DB_File, |
| 1919 | there can sometimes be part of a Perl script included in it. |
| 1920 | |
| 1921 | This happens because Berkeley DB uses dynamic memory to allocate |
| 1922 | buffers which will subsequently be written to the database file. Being |
| 1923 | dynamic, the memory could have been used for anything before DB |
| 1924 | malloced it. As Berkeley DB doesn't clear the memory once it has been |
| 1925 | allocated, the unused portions will contain random junk. In the case |
| 1926 | where a Perl script gets written to the database, the random junk will |
| 1927 | correspond to an area of dynamic memory that happened to be used during |
| 1928 | the compilation of the script. |
| 1929 | |
| 1930 | Unless you don't like the possibility of there being part of your Perl |
| 1931 | scripts embedded in a database file, this is nothing to worry about. |
| 1932 | |
| 1933 | =head2 How do I store complex data structures with DB_File? |
| 1934 | |
| 1935 | Although B<DB_File> cannot do this directly, there is a module which |
| 1936 | can layer transparently over B<DB_File> to accomplish this feat. |
| 1937 | |
| 1938 | Check out the MLDBM module, available on CPAN in the directory |
| 1939 | F<modules/by-module/MLDBM>. |
| 1940 | |
| 1941 | =head2 What does "Invalid Argument" mean? |
| 1942 | |
| 1943 | You will get this error message when one of the parameters in the |
| 1944 | C<tie> call is wrong. Unfortunately there are quite a few parameters to |
| 1945 | get wrong, so it can be difficult to figure out which one it is. |
| 1946 | |
| 1947 | Here are a couple of possibilities: |
| 1948 | |
| 1949 | =over 5 |
| 1950 | |
| 1951 | =item 1. |
| 1952 | |
| 1953 | Attempting to reopen a database without closing it. |
| 1954 | |
| 1955 | =item 2. |
| 1956 | |
| 1957 | Using the O_WRONLY flag. |
| 1958 | |
| 1959 | =back |
| 1960 | |
| 1961 | =head2 What does "Bareword 'DB_File' not allowed" mean? |
| 1962 | |
| 1963 | You will encounter this particular error message when you have the |
| 1964 | C<strict 'subs'> pragma (or the full strict pragma) in your script. |
| 1965 | Consider this script: |
| 1966 | |
| 1967 | use warnings ; |
| 1968 | use strict ; |
| 1969 | use DB_File ; |
| 1970 | use vars qw(%x) ; |
| 1971 | tie %x, DB_File, "filename" ; |
| 1972 | |
| 1973 | Running it produces the error in question: |
| 1974 | |
| 1975 | Bareword "DB_File" not allowed while "strict subs" in use |
| 1976 | |
| 1977 | To get around the error, place the word C<DB_File> in either single or |
| 1978 | double quotes, like this: |
| 1979 | |
| 1980 | tie %x, "DB_File", "filename" ; |
| 1981 | |
| 1982 | Although it might seem like a real pain, it is really worth the effort |
| 1983 | of having a C<use strict> in all your scripts. |
| 1984 | |
| 1985 | =head1 REFERENCES |
| 1986 | |
| 1987 | Articles that are either about B<DB_File> or make use of it. |
| 1988 | |
| 1989 | =over 5 |
| 1990 | |
| 1991 | =item 1. |
| 1992 | |
| 1993 | I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com), |
| 1994 | Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41 |
| 1995 | |
| 1996 | =back |
| 1997 | |
| 1998 | =head1 HISTORY |
| 1999 | |
| 2000 | Moved to the Changes file. |
| 2001 | |
| 2002 | =head1 BUGS |
| 2003 | |
| 2004 | Some older versions of Berkeley DB had problems with fixed length |
| 2005 | records using the RECNO file format. This problem has been fixed since |
| 2006 | version 1.85 of Berkeley DB. |
| 2007 | |
| 2008 | I am sure there are bugs in the code. If you do find any, or can |
| 2009 | suggest any enhancements, I would welcome your comments. |
| 2010 | |
| 2011 | =head1 AVAILABILITY |
| 2012 | |
| 2013 | B<DB_File> comes with the standard Perl source distribution. Look in |
| 2014 | the directory F<ext/DB_File>. Given the amount of time between releases |
| 2015 | of Perl the version that ships with Perl is quite likely to be out of |
| 2016 | date, so the most recent version can always be found on CPAN (see |
| 2017 | L<perlmod/CPAN> for details), in the directory |
| 2018 | F<modules/by-module/DB_File>. |
| 2019 | |
| 2020 | This version of B<DB_File> will work with either version 1.x, 2.x or |
| 2021 | 3.x of Berkeley DB, but is limited to the functionality provided by |
| 2022 | version 1. |
| 2023 | |
| 2024 | The official web site for Berkeley DB is F<http://www.sleepycat.com>. |
| 2025 | All versions of Berkeley DB are available there. |
| 2026 | |
| 2027 | Alternatively, Berkeley DB version 1 is available at your nearest CPAN |
| 2028 | archive in F<src/misc/db.1.85.tar.gz>. |
| 2029 | |
| 2030 | If you are running IRIX, then get Berkeley DB version 1 from |
| 2031 | F<http://reality.sgi.com/ariel>. It has the patches necessary to |
| 2032 | compile properly on IRIX 5.3. |
| 2033 | |
| 2034 | =head1 COPYRIGHT |
| 2035 | |
| 2036 | Copyright (c) 1995-1999 Paul Marquess. All rights reserved. This program |
| 2037 | is free software; you can redistribute it and/or modify it under the |
| 2038 | same terms as Perl itself. |
| 2039 | |
| 2040 | Although B<DB_File> is covered by the Perl license, the library it |
| 2041 | makes use of, namely Berkeley DB, is not. Berkeley DB has its own |
| 2042 | copyright and its own license. Please take the time to read it. |
| 2043 | |
| 2044 | Here are are few words taken from the Berkeley DB FAQ (at |
| 2045 | F<http://www.sleepycat.com>) regarding the license: |
| 2046 | |
| 2047 | Do I have to license DB to use it in Perl scripts? |
| 2048 | |
| 2049 | No. The Berkeley DB license requires that software that uses |
| 2050 | Berkeley DB be freely redistributable. In the case of Perl, that |
| 2051 | software is Perl, and not your scripts. Any Perl scripts that you |
| 2052 | write are your property, including scripts that make use of |
| 2053 | Berkeley DB. Neither the Perl license nor the Berkeley DB license |
| 2054 | place any restriction on what you may do with them. |
| 2055 | |
| 2056 | If you are in any doubt about the license situation, contact either the |
| 2057 | Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details. |
| 2058 | |
| 2059 | |
| 2060 | =head1 SEE ALSO |
| 2061 | |
| 2062 | L<perl(1)>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>, |
| 2063 | L<dbmfilter> |
| 2064 | |
| 2065 | =head1 AUTHOR |
| 2066 | |
| 2067 | The DB_File interface was written by Paul Marquess |
| 2068 | E<lt>Paul.Marquess@btinternet.comE<gt>. |
| 2069 | Questions about the DB system itself may be addressed to |
| 2070 | E<lt>db@sleepycat.com<gt>. |
| 2071 | |
| 2072 | =cut |