| 1 | package DBM_Filter ; |
| 2 | |
| 3 | use strict; |
| 4 | use warnings; |
| 5 | our $VERSION = '0.02'; |
| 6 | |
| 7 | package Tie::Hash ; |
| 8 | |
| 9 | use strict; |
| 10 | use warnings; |
| 11 | |
| 12 | use Carp; |
| 13 | |
| 14 | |
| 15 | our %LayerStack = (); |
| 16 | our %origDESTROY = (); |
| 17 | |
| 18 | our %Filters = map { $_, undef } qw( |
| 19 | Fetch_Key |
| 20 | Fetch_Value |
| 21 | Store_Key |
| 22 | Store_Value |
| 23 | ); |
| 24 | |
| 25 | our %Options = map { $_, 1 } qw( |
| 26 | fetch |
| 27 | store |
| 28 | ); |
| 29 | |
| 30 | #sub Filter_Enable |
| 31 | #{ |
| 32 | #} |
| 33 | # |
| 34 | #sub Filter_Disable |
| 35 | #{ |
| 36 | #} |
| 37 | |
| 38 | sub Filtered |
| 39 | { |
| 40 | my $this = shift; |
| 41 | return defined $LayerStack{$this} ; |
| 42 | } |
| 43 | |
| 44 | sub Filter_Pop |
| 45 | { |
| 46 | my $this = shift; |
| 47 | my $stack = $LayerStack{$this} || return undef ; |
| 48 | my $filter = pop @{ $stack }; |
| 49 | |
| 50 | # remove the filter hooks if this is the last filter to pop |
| 51 | if ( @{ $stack } == 0 ) { |
| 52 | $this->filter_store_key ( undef ); |
| 53 | $this->filter_store_value( undef ); |
| 54 | $this->filter_fetch_key ( undef ); |
| 55 | $this->filter_fetch_value( undef ); |
| 56 | delete $LayerStack{$this}; |
| 57 | } |
| 58 | |
| 59 | return $filter; |
| 60 | } |
| 61 | |
| 62 | sub Filter_Key_Push |
| 63 | { |
| 64 | &_do_Filter_Push; |
| 65 | } |
| 66 | |
| 67 | sub Filter_Value_Push |
| 68 | { |
| 69 | &_do_Filter_Push; |
| 70 | } |
| 71 | |
| 72 | |
| 73 | sub Filter_Push |
| 74 | { |
| 75 | &_do_Filter_Push; |
| 76 | } |
| 77 | |
| 78 | sub _do_Filter_Push |
| 79 | { |
| 80 | my $this = shift; |
| 81 | my %callbacks = (); |
| 82 | my $caller = (caller(1))[3]; |
| 83 | $caller =~ s/^.*:://; |
| 84 | |
| 85 | croak "$caller: no parameters present" unless @_ ; |
| 86 | |
| 87 | if ( ! $Options{lc $_[0]} ) { |
| 88 | my $class = shift; |
| 89 | my @params = @_; |
| 90 | |
| 91 | # if $class already contains "::", don't prefix "DBM_Filter::" |
| 92 | $class = "DBM_Filter::$class" unless $class =~ /::/; |
| 93 | |
| 94 | no strict 'refs'; |
| 95 | # does the "DBM_Filter::$class" exist? |
| 96 | if ( ! defined %{ "${class}::"} ) { |
| 97 | # Nope, so try to load it. |
| 98 | eval " require $class ; " ; |
| 99 | croak "$caller: Cannot Load DBM Filter '$class': $@" if $@; |
| 100 | } |
| 101 | |
| 102 | my $fetch = *{ "${class}::Fetch" }{CODE}; |
| 103 | my $store = *{ "${class}::Store" }{CODE}; |
| 104 | my $filter = *{ "${class}::Filter" }{CODE}; |
| 105 | use strict 'refs'; |
| 106 | |
| 107 | my $count = defined($filter) + defined($store) + defined($fetch) ; |
| 108 | |
| 109 | if ( $count == 0 ) |
| 110 | { croak "$caller: No methods (Filter, Fetch or Store) found in class '$class'" } |
| 111 | elsif ( $count == 1 && ! defined $filter) { |
| 112 | my $need = defined($fetch) ? 'Store' : 'Fetch'; |
| 113 | croak "$caller: Missing method '$need' in class '$class'" ; |
| 114 | } |
| 115 | elsif ( $count >= 2 && defined $filter) |
| 116 | { croak "$caller: Can't mix Filter with Store and Fetch in class '$class'" } |
| 117 | |
| 118 | if (defined $filter) { |
| 119 | my $callbacks = &{ $filter }(@params); |
| 120 | croak "$caller: '${class}::Filter' did not return a hash reference" |
| 121 | unless ref $callbacks && ref $callbacks eq 'HASH'; |
| 122 | %callbacks = %{ $callbacks } ; |
| 123 | } |
| 124 | else { |
| 125 | $callbacks{Fetch} = $fetch; |
| 126 | $callbacks{Store} = $store; |
| 127 | } |
| 128 | } |
| 129 | else { |
| 130 | croak "$caller: not even params" unless @_ % 2 == 0; |
| 131 | %callbacks = @_; |
| 132 | } |
| 133 | |
| 134 | my %filters = %Filters ; |
| 135 | my @got = (); |
| 136 | while (my ($k, $v) = each %callbacks ) |
| 137 | { |
| 138 | my $key = $k; |
| 139 | $k = lc $k; |
| 140 | if ($k eq 'fetch') { |
| 141 | push @got, 'Fetch'; |
| 142 | if ($caller eq 'Filter_Push') |
| 143 | { $filters{Fetch_Key} = $filters{Fetch_Value} = $v } |
| 144 | elsif ($caller eq 'Filter_Key_Push') |
| 145 | { $filters{Fetch_Key} = $v } |
| 146 | elsif ($caller eq 'Filter_Value_Push') |
| 147 | { $filters{Fetch_Value} = $v } |
| 148 | } |
| 149 | elsif ($k eq 'store') { |
| 150 | push @got, 'Store'; |
| 151 | if ($caller eq 'Filter_Push') |
| 152 | { $filters{Store_Key} = $filters{Store_Value} = $v } |
| 153 | elsif ($caller eq 'Filter_Key_Push') |
| 154 | { $filters{Store_Key} = $v } |
| 155 | elsif ($caller eq 'Filter_Value_Push') |
| 156 | { $filters{Store_Value} = $v } |
| 157 | } |
| 158 | else |
| 159 | { croak "$caller: Unknown key '$key'" } |
| 160 | |
| 161 | croak "$caller: value associated with key '$key' is not a code reference" |
| 162 | unless ref $v && ref $v eq 'CODE'; |
| 163 | } |
| 164 | |
| 165 | if ( @got != 2 ) { |
| 166 | push @got, 'neither' if @got == 0 ; |
| 167 | croak "$caller: expected both Store & Fetch - got @got"; |
| 168 | } |
| 169 | |
| 170 | # remember the class |
| 171 | push @{ $LayerStack{$this} }, \%filters ; |
| 172 | |
| 173 | my $str_this = "$this" ; # Avoid a closure with $this in the subs below |
| 174 | |
| 175 | $this->filter_store_key ( sub { store_hook($str_this, 'Store_Key') }); |
| 176 | $this->filter_store_value( sub { store_hook($str_this, 'Store_Value') }); |
| 177 | $this->filter_fetch_key ( sub { fetch_hook($str_this, 'Fetch_Key') }); |
| 178 | $this->filter_fetch_value( sub { fetch_hook($str_this, 'Fetch_Value') }); |
| 179 | |
| 180 | # Hijack the callers DESTROY method |
| 181 | $this =~ /^(.*)=/; |
| 182 | my $type = $1 ; |
| 183 | no strict 'refs'; |
| 184 | if ( *{ "${type}::DESTROY" }{CODE} ne \&MyDESTROY ) |
| 185 | { |
| 186 | $origDESTROY{$type} = *{ "${type}::DESTROY" }{CODE}; |
| 187 | no warnings 'redefine'; |
| 188 | *{ "${type}::DESTROY" } = \&MyDESTROY ; |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | sub store_hook |
| 193 | { |
| 194 | my $this = shift ; |
| 195 | my $type = shift ; |
| 196 | foreach my $layer (@{ $LayerStack{$this} }) |
| 197 | { |
| 198 | &{ $layer->{$type} }() if defined $layer->{$type} ; |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | sub fetch_hook |
| 203 | { |
| 204 | my $this = shift ; |
| 205 | my $type = shift ; |
| 206 | foreach my $layer (reverse @{ $LayerStack{$this} }) |
| 207 | { |
| 208 | &{ $layer->{$type} }() if defined $layer->{$type} ; |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | sub MyDESTROY |
| 213 | { |
| 214 | my $this = shift ; |
| 215 | delete $LayerStack{$this} ; |
| 216 | |
| 217 | # call real DESTROY |
| 218 | $this =~ /^(.*)=/; |
| 219 | &{ $origDESTROY{$1} }($this); |
| 220 | } |
| 221 | |
| 222 | 1; |
| 223 | |
| 224 | __END__ |
| 225 | |
| 226 | =head1 NAME |
| 227 | |
| 228 | DBM_Filter -- Filter DBM keys/values |
| 229 | |
| 230 | =head1 SYNOPSIS |
| 231 | |
| 232 | use DBM_Filter ; |
| 233 | use SDBM_File; # or DB_File, or GDBM_File, or NDBM_File, or ODBM_File |
| 234 | |
| 235 | $db = tie %hash, ... |
| 236 | |
| 237 | $db->Filter_Push(Fetch => sub {...}, |
| 238 | Store => sub {...}); |
| 239 | |
| 240 | $db->Filter_Push('my_filter1'); |
| 241 | $db->Filter_Push('my_filter2', params...); |
| 242 | |
| 243 | $db->Filter_Key_Push(...) ; |
| 244 | $db->Filter_Value_Push(...) ; |
| 245 | |
| 246 | $db->Filter_Pop(); |
| 247 | $db->Filtered(); |
| 248 | |
| 249 | package DBM_Filter::my_filter1; |
| 250 | |
| 251 | sub Store { ... } |
| 252 | sub Fetch { ... } |
| 253 | |
| 254 | 1; |
| 255 | |
| 256 | package DBM_Filter::my_filter2; |
| 257 | |
| 258 | sub Filter |
| 259 | { |
| 260 | my @opts = @_; |
| 261 | ... |
| 262 | return ( |
| 263 | sub Store { ... }, |
| 264 | sub Fetch { ... } ); |
| 265 | } |
| 266 | |
| 267 | 1; |
| 268 | |
| 269 | =head1 DESCRIPTION |
| 270 | |
| 271 | This module provides an interface that allows filters to be applied |
| 272 | to tied Hashes associated with DBM files. It builds on the DBM Filter |
| 273 | hooks that are present in all the *DB*_File modules included with the |
| 274 | standard Perl source distribution from version 5.6.1 onwards. In addition |
| 275 | to the *DB*_File modules distributed with Perl, the BerkeleyDB module, |
| 276 | available on CPAN, supports the DBM Filter hooks. See L<perldbmfilter> |
| 277 | for more details on the DBM Filter hooks. |
| 278 | |
| 279 | =head1 What is a DBM Filter? |
| 280 | |
| 281 | A DBM Filter allows the keys and/or values in a tied hash to be modified |
| 282 | by some user-defined code just before it is written to the DBM file and |
| 283 | just after it is read back from the DBM file. For example, this snippet |
| 284 | of code |
| 285 | |
| 286 | $some_hash{"abc"} = 42; |
| 287 | |
| 288 | could potentially trigger two filters, one for the writing of the key |
| 289 | "abc" and another for writing the value 42. Similarly, this snippet |
| 290 | |
| 291 | my ($key, $value) = each %some_hash |
| 292 | |
| 293 | will trigger two filters, one for the reading of the key and one for |
| 294 | the reading of the value. |
| 295 | |
| 296 | Like the existing DBM Filter functionality, this module arranges for the |
| 297 | C<$_> variable to be populated with the key or value that a filter will |
| 298 | check. This usually means that most DBM filters tend to be very short. |
| 299 | |
| 300 | =head2 So what's new? |
| 301 | |
| 302 | The main enhancements over the standard DBM Filter hooks are: |
| 303 | |
| 304 | =over 4 |
| 305 | |
| 306 | =item * |
| 307 | |
| 308 | A cleaner interface. |
| 309 | |
| 310 | =item * |
| 311 | |
| 312 | The ability to easily apply multiple filters to a single DBM file. |
| 313 | |
| 314 | =item * |
| 315 | |
| 316 | The ability to create "canned" filters. These allow commonly used filters |
| 317 | to be packaged into a stand-alone module. |
| 318 | |
| 319 | =back |
| 320 | |
| 321 | =head1 METHODS |
| 322 | |
| 323 | This module will arrange for the following methods to be available via |
| 324 | the object returned from the C<tie> call. |
| 325 | |
| 326 | =head2 $db->Filter_Push() |
| 327 | |
| 328 | =head2 $db->Filter_Key_Push() |
| 329 | |
| 330 | =head2 $db->Filter_Value_Push() |
| 331 | |
| 332 | Add a filter to filter stack for the database, C<$db>. The three formats |
| 333 | vary only in whether they apply to the DBM key, the DBM value or both. |
| 334 | |
| 335 | =over 5 |
| 336 | |
| 337 | =item Filter_Push |
| 338 | |
| 339 | The filter is applied to I<both> keys and values. |
| 340 | |
| 341 | =item Filter_Key_Push |
| 342 | |
| 343 | The filter is applied to the key I<only>. |
| 344 | |
| 345 | =item Filter_Value_Push |
| 346 | |
| 347 | The filter is applied to the value I<only>. |
| 348 | |
| 349 | =back |
| 350 | |
| 351 | |
| 352 | =head2 $db->Filter_Pop() |
| 353 | |
| 354 | Removes the last filter that was applied to the DBM file associated with |
| 355 | C<$db>, if present. |
| 356 | |
| 357 | =head2 $db->Filtered() |
| 358 | |
| 359 | Returns TRUE if there are any filters applied to the DBM associated |
| 360 | with C<$db>. Otherwise returns FALSE. |
| 361 | |
| 362 | |
| 363 | |
| 364 | =head1 Writing a Filter |
| 365 | |
| 366 | Filters can be created in two main ways |
| 367 | |
| 368 | =head2 Immediate Filters |
| 369 | |
| 370 | An immediate filter allows you to specify the filter code to be used |
| 371 | at the point where the filter is applied to a dbm. In this mode the |
| 372 | Filter_*_Push methods expects to receive exactly two parameters. |
| 373 | |
| 374 | my $db = tie %hash, 'SDBM_File', ... |
| 375 | $db->Filter_Push( Store => sub { }, |
| 376 | Fetch => sub { }); |
| 377 | |
| 378 | The code reference associated with C<Store> will be called before any |
| 379 | key/value is written to the database and the code reference associated |
| 380 | with C<Fetch> will be called after any key/value is read from the |
| 381 | database. |
| 382 | |
| 383 | For example, here is a sample filter that adds a trailing NULL character |
| 384 | to all strings before they are written to the DBM file, and removes the |
| 385 | trailing NULL when they are read from the DBM file |
| 386 | |
| 387 | my $db = tie %hash, 'SDBM_File', ... |
| 388 | $db->Filter_Push( Store => sub { $_ .= "\x00" ; }, |
| 389 | Fetch => sub { s/\x00$// ; }); |
| 390 | |
| 391 | |
| 392 | Points to note: |
| 393 | |
| 394 | =over 5 |
| 395 | |
| 396 | =item 1. |
| 397 | |
| 398 | Both the Store and Fetch filters manipulate C<$_>. |
| 399 | |
| 400 | =back |
| 401 | |
| 402 | =head2 Canned Filters |
| 403 | |
| 404 | Immediate filters are useful for one-off situations. For more generic |
| 405 | problems it can be useful to package the filter up in its own module. |
| 406 | |
| 407 | The usage is for a canned filter is: |
| 408 | |
| 409 | $db->Filter_Push("name", params) |
| 410 | |
| 411 | where |
| 412 | |
| 413 | =over 5 |
| 414 | |
| 415 | =item "name" |
| 416 | |
| 417 | is the name of the module to load. If the string specified does not |
| 418 | contain the package separator characters "::", it is assumed to refer to |
| 419 | the full module name "DBM_Filter::name". This means that the full names |
| 420 | for canned filters, "null" and "utf8", included with this module are: |
| 421 | |
| 422 | DBM_Filter::null |
| 423 | DBM_Filter::utf8 |
| 424 | |
| 425 | =item params |
| 426 | |
| 427 | any optional parameters that need to be sent to the filter. See the |
| 428 | encode filter for an example of a module that uses parameters. |
| 429 | |
| 430 | =back |
| 431 | |
| 432 | The module that implements the canned filter can take one of two |
| 433 | forms. Here is a template for the first |
| 434 | |
| 435 | package DBM_Filter::null ; |
| 436 | |
| 437 | use strict; |
| 438 | use warnings; |
| 439 | |
| 440 | sub Store |
| 441 | { |
| 442 | # store code here |
| 443 | } |
| 444 | |
| 445 | sub Fetch |
| 446 | { |
| 447 | # fetch code here |
| 448 | } |
| 449 | |
| 450 | 1; |
| 451 | |
| 452 | |
| 453 | Notes: |
| 454 | |
| 455 | =over 5 |
| 456 | |
| 457 | =item 1. |
| 458 | |
| 459 | The package name uses the C<DBM_Filter::> prefix. |
| 460 | |
| 461 | =item 2. |
| 462 | |
| 463 | The module I<must> have both a Store and a Fetch method. If only one is |
| 464 | present, or neither are present, a fatal error will be thrown. |
| 465 | |
| 466 | =back |
| 467 | |
| 468 | The second form allows the filter to hold state information using a |
| 469 | closure, thus: |
| 470 | |
| 471 | package DBM_Filter::encoding ; |
| 472 | |
| 473 | use strict; |
| 474 | use warnings; |
| 475 | |
| 476 | sub Filter |
| 477 | { |
| 478 | my @params = @_ ; |
| 479 | |
| 480 | ... |
| 481 | return { |
| 482 | Store => sub { $_ = $encoding->encode($_) }, |
| 483 | Fetch => sub { $_ = $encoding->decode($_) } |
| 484 | } ; |
| 485 | } |
| 486 | |
| 487 | 1; |
| 488 | |
| 489 | |
| 490 | In this instance the "Store" and "Fetch" methods are encapsulated inside a |
| 491 | "Filter" method. |
| 492 | |
| 493 | |
| 494 | =head1 Filters Included |
| 495 | |
| 496 | A number of canned filers are provided with this module. They cover a |
| 497 | number of the main areas that filters are needed when interfacing with |
| 498 | DBM files. They also act as templates for your own filters. |
| 499 | |
| 500 | The filter included are: |
| 501 | |
| 502 | =over 5 |
| 503 | |
| 504 | =item * utf8 |
| 505 | |
| 506 | This module will ensure that all data written to the DBM will be encoded |
| 507 | in UTF-8. |
| 508 | |
| 509 | This module needs the Encode module. |
| 510 | |
| 511 | =item * encode |
| 512 | |
| 513 | Allows you to choose the character encoding will be store in the DBM file. |
| 514 | |
| 515 | =item * compress |
| 516 | |
| 517 | This filter will compress all data before it is written to the database |
| 518 | and uncompressed it on reading. |
| 519 | |
| 520 | This module needs Compress::Zlib. |
| 521 | |
| 522 | =item * int32 |
| 523 | |
| 524 | This module is used when interoperating with a C/C++ application that |
| 525 | uses a C int as either the key and/or value in the DBM file. |
| 526 | |
| 527 | =item * null |
| 528 | |
| 529 | This module ensures that all data written to the DBM file is null |
| 530 | terminated. This is useful when you have a perl script that needs |
| 531 | to interoperate with a DBM file that a C program also uses. A fairly |
| 532 | common issue is for the C application to include the terminating null |
| 533 | in a string when it writes to the DBM file. This filter will ensure that |
| 534 | all data written to the DBM file can be read by the C application. |
| 535 | |
| 536 | =back |
| 537 | |
| 538 | =head1 NOTES |
| 539 | |
| 540 | =head2 Maintain Round Trip Integrity |
| 541 | |
| 542 | When writing a DBM filter it is I<very> important to ensure that it is |
| 543 | possible to retrieve all data that you have written when the DBM filter |
| 544 | is in place. In practice, this means that whatever transformation is |
| 545 | applied to the data in the Store method, the I<exact> inverse operation |
| 546 | should be applied in the Fetch method. |
| 547 | |
| 548 | If you don't provide an exact inverse transformation, you will find that |
| 549 | code like this will not behave as you expect. |
| 550 | |
| 551 | while (my ($k, $v) = each %hash) |
| 552 | { |
| 553 | ... |
| 554 | } |
| 555 | |
| 556 | Depending on the transformation, you will find that one or more of the |
| 557 | following will happen |
| 558 | |
| 559 | =over 5 |
| 560 | |
| 561 | =item 1 |
| 562 | |
| 563 | The loop will never terminate. |
| 564 | |
| 565 | =item 2 |
| 566 | |
| 567 | Too few records will be retrieved. |
| 568 | |
| 569 | =item 3 |
| 570 | |
| 571 | Too many will be retrieved. |
| 572 | |
| 573 | =item 4 |
| 574 | |
| 575 | The loop will do the right thing for a while, but it will unexpectedly fail. |
| 576 | |
| 577 | =back |
| 578 | |
| 579 | =head2 Don't mix filtered & non-filtered data in the same database file. |
| 580 | |
| 581 | This is just a restatement of the previous section. Unless you are |
| 582 | completely certain you know what you are doing, avoid mixing filtered & |
| 583 | non-filtered data. |
| 584 | |
| 585 | =head1 EXAMPLE |
| 586 | |
| 587 | Say you need to interoperate with a legacy C application that stores |
| 588 | keys as C ints and the values and null terminated UTF-8 strings. Here |
| 589 | is how you would set that up |
| 590 | |
| 591 | my $db = tie %hash, 'SDBM_File', ... |
| 592 | |
| 593 | $db->Filter_Key_Push('int32') ; |
| 594 | |
| 595 | $db->Filter_Value_Push('utf8'); |
| 596 | $db->Filter_Value_Push('null'); |
| 597 | |
| 598 | =head1 SEE ALSO |
| 599 | |
| 600 | <DB_File>, L<GDBM_File>, L<NDBM_File>, L<ODBM_File>, L<SDBM_File>, L<perldbmfilter> |
| 601 | |
| 602 | =head1 AUTHOR |
| 603 | |
| 604 | Paul Marquess <pmqs@cpan.org> |
| 605 | |