Commit | Line | Data |
---|---|---|
0e9b1cbd PM |
1 | package DBM_Filter ; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
902fde96 | 5 | our $VERSION = '0.03'; |
0e9b1cbd PM |
6 | |
7 | package Tie::Hash ; | |
8 | ||
9 | use strict; | |
10 | use warnings; | |
11 | ||
12 | use Carp; | |
13 | ||
14 | ||
15 | our %LayerStack = (); | |
16 | our %origDESTROY = (); | |
17 | ||
18 | our %Filters = map { $_, undef } qw( | |
19 | Fetch_Key | |
20 | Fetch_Value | |
21 | Store_Key | |
22 | Store_Value | |
23 | ); | |
24 | ||
25 | our %Options = map { $_, 1 } qw( | |
26 | fetch | |
27 | store | |
28 | ); | |
29 | ||
30 | #sub Filter_Enable | |
31 | #{ | |
32 | #} | |
33 | # | |
34 | #sub Filter_Disable | |
35 | #{ | |
36 | #} | |
37 | ||
38 | sub Filtered | |
39 | { | |
40 | my $this = shift; | |
41 | return defined $LayerStack{$this} ; | |
42 | } | |
43 | ||
44 | sub Filter_Pop | |
45 | { | |
46 | my $this = shift; | |
47 | my $stack = $LayerStack{$this} || return undef ; | |
48 | my $filter = pop @{ $stack }; | |
49 | ||
50 | # remove the filter hooks if this is the last filter to pop | |
51 | if ( @{ $stack } == 0 ) { | |
52 | $this->filter_store_key ( undef ); | |
53 | $this->filter_store_value( undef ); | |
54 | $this->filter_fetch_key ( undef ); | |
55 | $this->filter_fetch_value( undef ); | |
56 | delete $LayerStack{$this}; | |
57 | } | |
58 | ||
59 | return $filter; | |
60 | } | |
61 | ||
62 | sub Filter_Key_Push | |
63 | { | |
64 | &_do_Filter_Push; | |
65 | } | |
66 | ||
67 | sub Filter_Value_Push | |
68 | { | |
69 | &_do_Filter_Push; | |
70 | } | |
71 | ||
72 | ||
73 | sub Filter_Push | |
74 | { | |
75 | &_do_Filter_Push; | |
76 | } | |
77 | ||
78 | sub _do_Filter_Push | |
79 | { | |
80 | my $this = shift; | |
81 | my %callbacks = (); | |
82 | my $caller = (caller(1))[3]; | |
83 | $caller =~ s/^.*:://; | |
84 | ||
85 | croak "$caller: no parameters present" unless @_ ; | |
86 | ||
87 | if ( ! $Options{lc $_[0]} ) { | |
88 | my $class = shift; | |
89 | my @params = @_; | |
90 | ||
91 | # if $class already contains "::", don't prefix "DBM_Filter::" | |
92 | $class = "DBM_Filter::$class" unless $class =~ /::/; | |
93 | ||
d9f30342 | 94 | no strict 'refs'; |
0e9b1cbd | 95 | # does the "DBM_Filter::$class" exist? |
902fde96 | 96 | if ( ! %{ "${class}::"} ) { |
0e9b1cbd PM |
97 | # Nope, so try to load it. |
98 | eval " require $class ; " ; | |
99 | croak "$caller: Cannot Load DBM Filter '$class': $@" if $@; | |
100 | } | |
101 | ||
0e9b1cbd PM |
102 | my $fetch = *{ "${class}::Fetch" }{CODE}; |
103 | my $store = *{ "${class}::Store" }{CODE}; | |
104 | my $filter = *{ "${class}::Filter" }{CODE}; | |
105 | use strict 'refs'; | |
106 | ||
107 | my $count = defined($filter) + defined($store) + defined($fetch) ; | |
108 | ||
109 | if ( $count == 0 ) | |
110 | { croak "$caller: No methods (Filter, Fetch or Store) found in class '$class'" } | |
111 | elsif ( $count == 1 && ! defined $filter) { | |
112 | my $need = defined($fetch) ? 'Store' : 'Fetch'; | |
113 | croak "$caller: Missing method '$need' in class '$class'" ; | |
114 | } | |
115 | elsif ( $count >= 2 && defined $filter) | |
116 | { croak "$caller: Can't mix Filter with Store and Fetch in class '$class'" } | |
117 | ||
118 | if (defined $filter) { | |
119 | my $callbacks = &{ $filter }(@params); | |
120 | croak "$caller: '${class}::Filter' did not return a hash reference" | |
121 | unless ref $callbacks && ref $callbacks eq 'HASH'; | |
122 | %callbacks = %{ $callbacks } ; | |
123 | } | |
124 | else { | |
125 | $callbacks{Fetch} = $fetch; | |
126 | $callbacks{Store} = $store; | |
127 | } | |
128 | } | |
129 | else { | |
130 | croak "$caller: not even params" unless @_ % 2 == 0; | |
131 | %callbacks = @_; | |
132 | } | |
133 | ||
134 | my %filters = %Filters ; | |
135 | my @got = (); | |
136 | while (my ($k, $v) = each %callbacks ) | |
137 | { | |
138 | my $key = $k; | |
139 | $k = lc $k; | |
140 | if ($k eq 'fetch') { | |
141 | push @got, 'Fetch'; | |
142 | if ($caller eq 'Filter_Push') | |
143 | { $filters{Fetch_Key} = $filters{Fetch_Value} = $v } | |
144 | elsif ($caller eq 'Filter_Key_Push') | |
145 | { $filters{Fetch_Key} = $v } | |
146 | elsif ($caller eq 'Filter_Value_Push') | |
147 | { $filters{Fetch_Value} = $v } | |
148 | } | |
149 | elsif ($k eq 'store') { | |
150 | push @got, 'Store'; | |
151 | if ($caller eq 'Filter_Push') | |
152 | { $filters{Store_Key} = $filters{Store_Value} = $v } | |
153 | elsif ($caller eq 'Filter_Key_Push') | |
154 | { $filters{Store_Key} = $v } | |
155 | elsif ($caller eq 'Filter_Value_Push') | |
156 | { $filters{Store_Value} = $v } | |
157 | } | |
158 | else | |
159 | { croak "$caller: Unknown key '$key'" } | |
160 | ||
161 | croak "$caller: value associated with key '$key' is not a code reference" | |
162 | unless ref $v && ref $v eq 'CODE'; | |
163 | } | |
164 | ||
165 | if ( @got != 2 ) { | |
166 | push @got, 'neither' if @got == 0 ; | |
167 | croak "$caller: expected both Store & Fetch - got @got"; | |
168 | } | |
169 | ||
170 | # remember the class | |
171 | push @{ $LayerStack{$this} }, \%filters ; | |
172 | ||
173 | my $str_this = "$this" ; # Avoid a closure with $this in the subs below | |
174 | ||
175 | $this->filter_store_key ( sub { store_hook($str_this, 'Store_Key') }); | |
176 | $this->filter_store_value( sub { store_hook($str_this, 'Store_Value') }); | |
177 | $this->filter_fetch_key ( sub { fetch_hook($str_this, 'Fetch_Key') }); | |
178 | $this->filter_fetch_value( sub { fetch_hook($str_this, 'Fetch_Value') }); | |
179 | ||
180 | # Hijack the callers DESTROY method | |
181 | $this =~ /^(.*)=/; | |
182 | my $type = $1 ; | |
183 | no strict 'refs'; | |
184 | if ( *{ "${type}::DESTROY" }{CODE} ne \&MyDESTROY ) | |
185 | { | |
186 | $origDESTROY{$type} = *{ "${type}::DESTROY" }{CODE}; | |
187 | no warnings 'redefine'; | |
188 | *{ "${type}::DESTROY" } = \&MyDESTROY ; | |
189 | } | |
190 | } | |
191 | ||
192 | sub store_hook | |
193 | { | |
194 | my $this = shift ; | |
195 | my $type = shift ; | |
196 | foreach my $layer (@{ $LayerStack{$this} }) | |
197 | { | |
198 | &{ $layer->{$type} }() if defined $layer->{$type} ; | |
199 | } | |
200 | } | |
201 | ||
202 | sub fetch_hook | |
203 | { | |
204 | my $this = shift ; | |
205 | my $type = shift ; | |
206 | foreach my $layer (reverse @{ $LayerStack{$this} }) | |
207 | { | |
208 | &{ $layer->{$type} }() if defined $layer->{$type} ; | |
209 | } | |
210 | } | |
211 | ||
212 | sub MyDESTROY | |
213 | { | |
214 | my $this = shift ; | |
215 | delete $LayerStack{$this} ; | |
216 | ||
217 | # call real DESTROY | |
218 | $this =~ /^(.*)=/; | |
219 | &{ $origDESTROY{$1} }($this); | |
220 | } | |
221 | ||
222 | 1; | |
223 | ||
224 | __END__ | |
225 | ||
226 | =head1 NAME | |
227 | ||
228 | DBM_Filter -- Filter DBM keys/values | |
229 | ||
230 | =head1 SYNOPSIS | |
231 | ||
232 | use DBM_Filter ; | |
233 | use SDBM_File; # or DB_File, or GDBM_File, or NDBM_File, or ODBM_File | |
234 | ||
235 | $db = tie %hash, ... | |
236 | ||
237 | $db->Filter_Push(Fetch => sub {...}, | |
238 | Store => sub {...}); | |
239 | ||
240 | $db->Filter_Push('my_filter1'); | |
241 | $db->Filter_Push('my_filter2', params...); | |
242 | ||
243 | $db->Filter_Key_Push(...) ; | |
244 | $db->Filter_Value_Push(...) ; | |
245 | ||
246 | $db->Filter_Pop(); | |
247 | $db->Filtered(); | |
248 | ||
249 | package DBM_Filter::my_filter1; | |
250 | ||
251 | sub Store { ... } | |
252 | sub Fetch { ... } | |
253 | ||
254 | 1; | |
255 | ||
256 | package DBM_Filter::my_filter2; | |
257 | ||
258 | sub Filter | |
259 | { | |
260 | my @opts = @_; | |
261 | ... | |
262 | return ( | |
263 | sub Store { ... }, | |
264 | sub Fetch { ... } ); | |
265 | } | |
266 | ||
267 | 1; | |
268 | ||
269 | =head1 DESCRIPTION | |
270 | ||
271 | This module provides an interface that allows filters to be applied | |
272 | to tied Hashes associated with DBM files. It builds on the DBM Filter | |
273 | hooks that are present in all the *DB*_File modules included with the | |
274 | standard Perl source distribution from version 5.6.1 onwards. In addition | |
275 | to the *DB*_File modules distributed with Perl, the BerkeleyDB module, | |
276 | available on CPAN, supports the DBM Filter hooks. See L<perldbmfilter> | |
277 | for more details on the DBM Filter hooks. | |
278 | ||
279 | =head1 What is a DBM Filter? | |
280 | ||
281 | A DBM Filter allows the keys and/or values in a tied hash to be modified | |
282 | by some user-defined code just before it is written to the DBM file and | |
283 | just after it is read back from the DBM file. For example, this snippet | |
284 | of code | |
285 | ||
286 | $some_hash{"abc"} = 42; | |
287 | ||
288 | could potentially trigger two filters, one for the writing of the key | |
289 | "abc" and another for writing the value 42. Similarly, this snippet | |
290 | ||
291 | my ($key, $value) = each %some_hash | |
292 | ||
293 | will trigger two filters, one for the reading of the key and one for | |
294 | the reading of the value. | |
295 | ||
296 | Like the existing DBM Filter functionality, this module arranges for the | |
297 | C<$_> variable to be populated with the key or value that a filter will | |
298 | check. This usually means that most DBM filters tend to be very short. | |
299 | ||
300 | =head2 So what's new? | |
301 | ||
302 | The main enhancements over the standard DBM Filter hooks are: | |
303 | ||
304 | =over 4 | |
305 | ||
306 | =item * | |
307 | ||
308 | A cleaner interface. | |
309 | ||
310 | =item * | |
311 | ||
312 | The ability to easily apply multiple filters to a single DBM file. | |
313 | ||
314 | =item * | |
315 | ||
316 | The ability to create "canned" filters. These allow commonly used filters | |
317 | to be packaged into a stand-alone module. | |
318 | ||
319 | =back | |
320 | ||
321 | =head1 METHODS | |
322 | ||
323 | This module will arrange for the following methods to be available via | |
324 | the object returned from the C<tie> call. | |
325 | ||
326 | =head2 $db->Filter_Push() | |
327 | ||
328 | =head2 $db->Filter_Key_Push() | |
329 | ||
330 | =head2 $db->Filter_Value_Push() | |
331 | ||
332 | Add a filter to filter stack for the database, C<$db>. The three formats | |
333 | vary only in whether they apply to the DBM key, the DBM value or both. | |
334 | ||
335 | =over 5 | |
336 | ||
337 | =item Filter_Push | |
338 | ||
339 | The filter is applied to I<both> keys and values. | |
340 | ||
341 | =item Filter_Key_Push | |
342 | ||
343 | The filter is applied to the key I<only>. | |
344 | ||
345 | =item Filter_Value_Push | |
346 | ||
347 | The filter is applied to the value I<only>. | |
348 | ||
349 | =back | |
350 | ||
351 | ||
352 | =head2 $db->Filter_Pop() | |
353 | ||
354 | Removes the last filter that was applied to the DBM file associated with | |
355 | C<$db>, if present. | |
356 | ||
357 | =head2 $db->Filtered() | |
358 | ||
359 | Returns TRUE if there are any filters applied to the DBM associated | |
360 | with C<$db>. Otherwise returns FALSE. | |
361 | ||
362 | ||
363 | ||
364 | =head1 Writing a Filter | |
365 | ||
366 | Filters can be created in two main ways | |
367 | ||
368 | =head2 Immediate Filters | |
369 | ||
370 | An immediate filter allows you to specify the filter code to be used | |
371 | at the point where the filter is applied to a dbm. In this mode the | |
372 | Filter_*_Push methods expects to receive exactly two parameters. | |
373 | ||
374 | my $db = tie %hash, 'SDBM_File', ... | |
375 | $db->Filter_Push( Store => sub { }, | |
376 | Fetch => sub { }); | |
377 | ||
378 | The code reference associated with C<Store> will be called before any | |
379 | key/value is written to the database and the code reference associated | |
380 | with C<Fetch> will be called after any key/value is read from the | |
381 | database. | |
382 | ||
383 | For example, here is a sample filter that adds a trailing NULL character | |
384 | to all strings before they are written to the DBM file, and removes the | |
385 | trailing NULL when they are read from the DBM file | |
386 | ||
387 | my $db = tie %hash, 'SDBM_File', ... | |
388 | $db->Filter_Push( Store => sub { $_ .= "\x00" ; }, | |
389 | Fetch => sub { s/\x00$// ; }); | |
390 | ||
391 | ||
392 | Points to note: | |
393 | ||
394 | =over 5 | |
395 | ||
396 | =item 1. | |
397 | ||
398 | Both the Store and Fetch filters manipulate C<$_>. | |
399 | ||
400 | =back | |
401 | ||
402 | =head2 Canned Filters | |
403 | ||
404 | Immediate filters are useful for one-off situations. For more generic | |
405 | problems it can be useful to package the filter up in its own module. | |
406 | ||
407 | The usage is for a canned filter is: | |
408 | ||
409 | $db->Filter_Push("name", params) | |
410 | ||
411 | where | |
412 | ||
413 | =over 5 | |
414 | ||
415 | =item "name" | |
416 | ||
417 | is the name of the module to load. If the string specified does not | |
418 | contain the package separator characters "::", it is assumed to refer to | |
419 | the full module name "DBM_Filter::name". This means that the full names | |
420 | for canned filters, "null" and "utf8", included with this module are: | |
421 | ||
422 | DBM_Filter::null | |
423 | DBM_Filter::utf8 | |
424 | ||
425 | =item params | |
426 | ||
427 | any optional parameters that need to be sent to the filter. See the | |
428 | encode filter for an example of a module that uses parameters. | |
429 | ||
430 | =back | |
431 | ||
432 | The module that implements the canned filter can take one of two | |
433 | forms. Here is a template for the first | |
434 | ||
435 | package DBM_Filter::null ; | |
436 | ||
437 | use strict; | |
438 | use warnings; | |
439 | ||
440 | sub Store | |
441 | { | |
442 | # store code here | |
443 | } | |
444 | ||
445 | sub Fetch | |
446 | { | |
447 | # fetch code here | |
448 | } | |
449 | ||
450 | 1; | |
451 | ||
452 | ||
453 | Notes: | |
454 | ||
455 | =over 5 | |
456 | ||
457 | =item 1. | |
458 | ||
459 | The package name uses the C<DBM_Filter::> prefix. | |
460 | ||
461 | =item 2. | |
462 | ||
463 | The module I<must> have both a Store and a Fetch method. If only one is | |
464 | present, or neither are present, a fatal error will be thrown. | |
465 | ||
466 | =back | |
467 | ||
468 | The second form allows the filter to hold state information using a | |
469 | closure, thus: | |
470 | ||
471 | package DBM_Filter::encoding ; | |
472 | ||
473 | use strict; | |
474 | use warnings; | |
475 | ||
476 | sub Filter | |
477 | { | |
478 | my @params = @_ ; | |
479 | ||
480 | ... | |
481 | return { | |
482 | Store => sub { $_ = $encoding->encode($_) }, | |
483 | Fetch => sub { $_ = $encoding->decode($_) } | |
484 | } ; | |
485 | } | |
486 | ||
487 | 1; | |
488 | ||
489 | ||
490 | In this instance the "Store" and "Fetch" methods are encapsulated inside a | |
491 | "Filter" method. | |
492 | ||
493 | ||
494 | =head1 Filters Included | |
495 | ||
496 | A number of canned filers are provided with this module. They cover a | |
497 | number of the main areas that filters are needed when interfacing with | |
498 | DBM files. They also act as templates for your own filters. | |
499 | ||
500 | The filter included are: | |
501 | ||
502 | =over 5 | |
503 | ||
504 | =item * utf8 | |
505 | ||
506 | This module will ensure that all data written to the DBM will be encoded | |
507 | in UTF-8. | |
508 | ||
509 | This module needs the Encode module. | |
510 | ||
511 | =item * encode | |
512 | ||
513 | Allows you to choose the character encoding will be store in the DBM file. | |
514 | ||
515 | =item * compress | |
516 | ||
517 | This filter will compress all data before it is written to the database | |
518 | and uncompressed it on reading. | |
519 | ||
520 | This module needs Compress::Zlib. | |
521 | ||
522 | =item * int32 | |
523 | ||
524 | This module is used when interoperating with a C/C++ application that | |
525 | uses a C int as either the key and/or value in the DBM file. | |
526 | ||
527 | =item * null | |
528 | ||
529 | This module ensures that all data written to the DBM file is null | |
530 | terminated. This is useful when you have a perl script that needs | |
531 | to interoperate with a DBM file that a C program also uses. A fairly | |
532 | common issue is for the C application to include the terminating null | |
533 | in a string when it writes to the DBM file. This filter will ensure that | |
534 | all data written to the DBM file can be read by the C application. | |
535 | ||
536 | =back | |
537 | ||
538 | =head1 NOTES | |
539 | ||
540 | =head2 Maintain Round Trip Integrity | |
541 | ||
542 | When writing a DBM filter it is I<very> important to ensure that it is | |
543 | possible to retrieve all data that you have written when the DBM filter | |
544 | is in place. In practice, this means that whatever transformation is | |
545 | applied to the data in the Store method, the I<exact> inverse operation | |
546 | should be applied in the Fetch method. | |
547 | ||
548 | If you don't provide an exact inverse transformation, you will find that | |
549 | code like this will not behave as you expect. | |
550 | ||
551 | while (my ($k, $v) = each %hash) | |
552 | { | |
553 | ... | |
554 | } | |
555 | ||
556 | Depending on the transformation, you will find that one or more of the | |
557 | following will happen | |
558 | ||
559 | =over 5 | |
560 | ||
561 | =item 1 | |
562 | ||
563 | The loop will never terminate. | |
564 | ||
565 | =item 2 | |
566 | ||
567 | Too few records will be retrieved. | |
568 | ||
569 | =item 3 | |
570 | ||
571 | Too many will be retrieved. | |
572 | ||
573 | =item 4 | |
574 | ||
575 | The loop will do the right thing for a while, but it will unexpectedly fail. | |
576 | ||
577 | =back | |
578 | ||
579 | =head2 Don't mix filtered & non-filtered data in the same database file. | |
580 | ||
581 | This is just a restatement of the previous section. Unless you are | |
582 | completely certain you know what you are doing, avoid mixing filtered & | |
583 | non-filtered data. | |
584 | ||
585 | =head1 EXAMPLE | |
586 | ||
587 | Say you need to interoperate with a legacy C application that stores | |
588 | keys as C ints and the values and null terminated UTF-8 strings. Here | |
589 | is how you would set that up | |
590 | ||
591 | my $db = tie %hash, 'SDBM_File', ... | |
592 | ||
593 | $db->Filter_Key_Push('int32') ; | |
594 | ||
595 | $db->Filter_Value_Push('utf8'); | |
596 | $db->Filter_Value_Push('null'); | |
597 | ||
598 | =head1 SEE ALSO | |
599 | ||
600 | <DB_File>, L<GDBM_File>, L<NDBM_File>, L<ODBM_File>, L<SDBM_File>, L<perldbmfilter> | |
601 | ||
602 | =head1 AUTHOR | |
603 | ||
604 | Paul Marquess <pmqs@cpan.org> | |
605 |