Commit | Line | Data |
---|---|---|
0e9b1cbd PM |
1 | package DBM_Filter ; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
73f081c4 | 5 | our $VERSION = '0.04'; |
0e9b1cbd PM |
6 | |
7 | package Tie::Hash ; | |
8 | ||
9 | use strict; | |
10 | use warnings; | |
11 | ||
12 | use Carp; | |
13 | ||
14 | ||
15 | our %LayerStack = (); | |
16 | our %origDESTROY = (); | |
17 | ||
18 | our %Filters = map { $_, undef } qw( | |
19 | Fetch_Key | |
20 | Fetch_Value | |
21 | Store_Key | |
22 | Store_Value | |
23 | ); | |
24 | ||
25 | our %Options = map { $_, 1 } qw( | |
26 | fetch | |
27 | store | |
28 | ); | |
29 | ||
30 | #sub Filter_Enable | |
31 | #{ | |
32 | #} | |
33 | # | |
34 | #sub Filter_Disable | |
35 | #{ | |
36 | #} | |
37 | ||
38 | sub Filtered | |
39 | { | |
40 | my $this = shift; | |
41 | return defined $LayerStack{$this} ; | |
42 | } | |
43 | ||
44 | sub Filter_Pop | |
45 | { | |
46 | my $this = shift; | |
47 | my $stack = $LayerStack{$this} || return undef ; | |
48 | my $filter = pop @{ $stack }; | |
49 | ||
50 | # remove the filter hooks if this is the last filter to pop | |
51 | if ( @{ $stack } == 0 ) { | |
52 | $this->filter_store_key ( undef ); | |
53 | $this->filter_store_value( undef ); | |
54 | $this->filter_fetch_key ( undef ); | |
55 | $this->filter_fetch_value( undef ); | |
56 | delete $LayerStack{$this}; | |
57 | } | |
58 | ||
59 | return $filter; | |
60 | } | |
61 | ||
62 | sub Filter_Key_Push | |
63 | { | |
64 | &_do_Filter_Push; | |
65 | } | |
66 | ||
67 | sub Filter_Value_Push | |
68 | { | |
69 | &_do_Filter_Push; | |
70 | } | |
71 | ||
72 | ||
73 | sub Filter_Push | |
74 | { | |
75 | &_do_Filter_Push; | |
76 | } | |
77 | ||
78 | sub _do_Filter_Push | |
79 | { | |
80 | my $this = shift; | |
81 | my %callbacks = (); | |
82 | my $caller = (caller(1))[3]; | |
83 | $caller =~ s/^.*:://; | |
84 | ||
85 | croak "$caller: no parameters present" unless @_ ; | |
86 | ||
87 | if ( ! $Options{lc $_[0]} ) { | |
88 | my $class = shift; | |
89 | my @params = @_; | |
90 | ||
91 | # if $class already contains "::", don't prefix "DBM_Filter::" | |
92 | $class = "DBM_Filter::$class" unless $class =~ /::/; | |
93 | ||
d9f30342 | 94 | no strict 'refs'; |
0e9b1cbd | 95 | # does the "DBM_Filter::$class" exist? |
902fde96 | 96 | if ( ! %{ "${class}::"} ) { |
0e9b1cbd PM |
97 | # Nope, so try to load it. |
98 | eval " require $class ; " ; | |
99 | croak "$caller: Cannot Load DBM Filter '$class': $@" if $@; | |
100 | } | |
101 | ||
0e9b1cbd PM |
102 | my $fetch = *{ "${class}::Fetch" }{CODE}; |
103 | my $store = *{ "${class}::Store" }{CODE}; | |
104 | my $filter = *{ "${class}::Filter" }{CODE}; | |
105 | use strict 'refs'; | |
106 | ||
107 | my $count = defined($filter) + defined($store) + defined($fetch) ; | |
108 | ||
109 | if ( $count == 0 ) | |
110 | { croak "$caller: No methods (Filter, Fetch or Store) found in class '$class'" } | |
111 | elsif ( $count == 1 && ! defined $filter) { | |
112 | my $need = defined($fetch) ? 'Store' : 'Fetch'; | |
113 | croak "$caller: Missing method '$need' in class '$class'" ; | |
114 | } | |
115 | elsif ( $count >= 2 && defined $filter) | |
116 | { croak "$caller: Can't mix Filter with Store and Fetch in class '$class'" } | |
117 | ||
118 | if (defined $filter) { | |
119 | my $callbacks = &{ $filter }(@params); | |
120 | croak "$caller: '${class}::Filter' did not return a hash reference" | |
121 | unless ref $callbacks && ref $callbacks eq 'HASH'; | |
122 | %callbacks = %{ $callbacks } ; | |
123 | } | |
124 | else { | |
125 | $callbacks{Fetch} = $fetch; | |
126 | $callbacks{Store} = $store; | |
127 | } | |
128 | } | |
129 | else { | |
130 | croak "$caller: not even params" unless @_ % 2 == 0; | |
131 | %callbacks = @_; | |
132 | } | |
133 | ||
134 | my %filters = %Filters ; | |
135 | my @got = (); | |
136 | while (my ($k, $v) = each %callbacks ) | |
137 | { | |
138 | my $key = $k; | |
139 | $k = lc $k; | |
140 | if ($k eq 'fetch') { | |
141 | push @got, 'Fetch'; | |
142 | if ($caller eq 'Filter_Push') | |
143 | { $filters{Fetch_Key} = $filters{Fetch_Value} = $v } | |
144 | elsif ($caller eq 'Filter_Key_Push') | |
145 | { $filters{Fetch_Key} = $v } | |
146 | elsif ($caller eq 'Filter_Value_Push') | |
147 | { $filters{Fetch_Value} = $v } | |
148 | } | |
149 | elsif ($k eq 'store') { | |
150 | push @got, 'Store'; | |
151 | if ($caller eq 'Filter_Push') | |
152 | { $filters{Store_Key} = $filters{Store_Value} = $v } | |
153 | elsif ($caller eq 'Filter_Key_Push') | |
154 | { $filters{Store_Key} = $v } | |
155 | elsif ($caller eq 'Filter_Value_Push') | |
156 | { $filters{Store_Value} = $v } | |
157 | } | |
158 | else | |
159 | { croak "$caller: Unknown key '$key'" } | |
160 | ||
161 | croak "$caller: value associated with key '$key' is not a code reference" | |
162 | unless ref $v && ref $v eq 'CODE'; | |
163 | } | |
164 | ||
165 | if ( @got != 2 ) { | |
166 | push @got, 'neither' if @got == 0 ; | |
167 | croak "$caller: expected both Store & Fetch - got @got"; | |
168 | } | |
169 | ||
170 | # remember the class | |
171 | push @{ $LayerStack{$this} }, \%filters ; | |
172 | ||
173 | my $str_this = "$this" ; # Avoid a closure with $this in the subs below | |
174 | ||
175 | $this->filter_store_key ( sub { store_hook($str_this, 'Store_Key') }); | |
176 | $this->filter_store_value( sub { store_hook($str_this, 'Store_Value') }); | |
177 | $this->filter_fetch_key ( sub { fetch_hook($str_this, 'Fetch_Key') }); | |
178 | $this->filter_fetch_value( sub { fetch_hook($str_this, 'Fetch_Value') }); | |
179 | ||
180 | # Hijack the callers DESTROY method | |
181 | $this =~ /^(.*)=/; | |
182 | my $type = $1 ; | |
183 | no strict 'refs'; | |
184 | if ( *{ "${type}::DESTROY" }{CODE} ne \&MyDESTROY ) | |
185 | { | |
186 | $origDESTROY{$type} = *{ "${type}::DESTROY" }{CODE}; | |
187 | no warnings 'redefine'; | |
188 | *{ "${type}::DESTROY" } = \&MyDESTROY ; | |
189 | } | |
190 | } | |
191 | ||
192 | sub store_hook | |
193 | { | |
194 | my $this = shift ; | |
195 | my $type = shift ; | |
196 | foreach my $layer (@{ $LayerStack{$this} }) | |
197 | { | |
198 | &{ $layer->{$type} }() if defined $layer->{$type} ; | |
199 | } | |
200 | } | |
201 | ||
202 | sub fetch_hook | |
203 | { | |
204 | my $this = shift ; | |
205 | my $type = shift ; | |
206 | foreach my $layer (reverse @{ $LayerStack{$this} }) | |
207 | { | |
208 | &{ $layer->{$type} }() if defined $layer->{$type} ; | |
209 | } | |
210 | } | |
211 | ||
212 | sub MyDESTROY | |
213 | { | |
214 | my $this = shift ; | |
215 | delete $LayerStack{$this} ; | |
216 | ||
217 | # call real DESTROY | |
218 | $this =~ /^(.*)=/; | |
219 | &{ $origDESTROY{$1} }($this); | |
220 | } | |
221 | ||
222 | 1; | |
223 | ||
224 | __END__ | |
225 | ||
226 | =head1 NAME | |
227 | ||
228 | DBM_Filter -- Filter DBM keys/values | |
229 | ||
230 | =head1 SYNOPSIS | |
231 | ||
232 | use DBM_Filter ; | |
233 | use SDBM_File; # or DB_File, or GDBM_File, or NDBM_File, or ODBM_File | |
234 | ||
235 | $db = tie %hash, ... | |
236 | ||
237 | $db->Filter_Push(Fetch => sub {...}, | |
238 | Store => sub {...}); | |
239 | ||
240 | $db->Filter_Push('my_filter1'); | |
241 | $db->Filter_Push('my_filter2', params...); | |
242 | ||
243 | $db->Filter_Key_Push(...) ; | |
244 | $db->Filter_Value_Push(...) ; | |
245 | ||
246 | $db->Filter_Pop(); | |
247 | $db->Filtered(); | |
248 | ||
249 | package DBM_Filter::my_filter1; | |
250 | ||
251 | sub Store { ... } | |
252 | sub Fetch { ... } | |
253 | ||
254 | 1; | |
255 | ||
256 | package DBM_Filter::my_filter2; | |
257 | ||
258 | sub Filter | |
259 | { | |
260 | my @opts = @_; | |
261 | ... | |
262 | return ( | |
263 | sub Store { ... }, | |
264 | sub Fetch { ... } ); | |
265 | } | |
266 | ||
267 | 1; | |
268 | ||
269 | =head1 DESCRIPTION | |
270 | ||
271 | This module provides an interface that allows filters to be applied | |
272 | to tied Hashes associated with DBM files. It builds on the DBM Filter | |
273 | hooks that are present in all the *DB*_File modules included with the | |
274 | standard Perl source distribution from version 5.6.1 onwards. In addition | |
275 | to the *DB*_File modules distributed with Perl, the BerkeleyDB module, | |
276 | available on CPAN, supports the DBM Filter hooks. See L<perldbmfilter> | |
277 | for more details on the DBM Filter hooks. | |
278 | ||
279 | =head1 What is a DBM Filter? | |
280 | ||
281 | A DBM Filter allows the keys and/or values in a tied hash to be modified | |
282 | by some user-defined code just before it is written to the DBM file and | |
283 | just after it is read back from the DBM file. For example, this snippet | |
284 | of code | |
285 | ||
286 | $some_hash{"abc"} = 42; | |
287 | ||
288 | could potentially trigger two filters, one for the writing of the key | |
289 | "abc" and another for writing the value 42. Similarly, this snippet | |
290 | ||
291 | my ($key, $value) = each %some_hash | |
292 | ||
293 | will trigger two filters, one for the reading of the key and one for | |
294 | the reading of the value. | |
295 | ||
296 | Like the existing DBM Filter functionality, this module arranges for the | |
297 | C<$_> variable to be populated with the key or value that a filter will | |
298 | check. This usually means that most DBM filters tend to be very short. | |
299 | ||
300 | =head2 So what's new? | |
301 | ||
302 | The main enhancements over the standard DBM Filter hooks are: | |
303 | ||
304 | =over 4 | |
305 | ||
306 | =item * | |
307 | ||
308 | A cleaner interface. | |
309 | ||
310 | =item * | |
311 | ||
312 | The ability to easily apply multiple filters to a single DBM file. | |
313 | ||
314 | =item * | |
315 | ||
316 | The ability to create "canned" filters. These allow commonly used filters | |
317 | to be packaged into a stand-alone module. | |
318 | ||
319 | =back | |
320 | ||
321 | =head1 METHODS | |
322 | ||
323 | This module will arrange for the following methods to be available via | |
324 | the object returned from the C<tie> call. | |
325 | ||
2d51747b | 326 | =head2 $db->Filter_Push() / $db->Filter_Key_Push() / $db->Filter_Value_Push() |
0e9b1cbd PM |
327 | |
328 | Add a filter to filter stack for the database, C<$db>. The three formats | |
329 | vary only in whether they apply to the DBM key, the DBM value or both. | |
330 | ||
331 | =over 5 | |
332 | ||
333 | =item Filter_Push | |
334 | ||
335 | The filter is applied to I<both> keys and values. | |
336 | ||
337 | =item Filter_Key_Push | |
338 | ||
339 | The filter is applied to the key I<only>. | |
340 | ||
341 | =item Filter_Value_Push | |
342 | ||
343 | The filter is applied to the value I<only>. | |
344 | ||
345 | =back | |
346 | ||
347 | ||
348 | =head2 $db->Filter_Pop() | |
349 | ||
350 | Removes the last filter that was applied to the DBM file associated with | |
351 | C<$db>, if present. | |
352 | ||
353 | =head2 $db->Filtered() | |
354 | ||
355 | Returns TRUE if there are any filters applied to the DBM associated | |
356 | with C<$db>. Otherwise returns FALSE. | |
357 | ||
358 | ||
359 | ||
360 | =head1 Writing a Filter | |
361 | ||
362 | Filters can be created in two main ways | |
363 | ||
364 | =head2 Immediate Filters | |
365 | ||
366 | An immediate filter allows you to specify the filter code to be used | |
367 | at the point where the filter is applied to a dbm. In this mode the | |
368 | Filter_*_Push methods expects to receive exactly two parameters. | |
369 | ||
370 | my $db = tie %hash, 'SDBM_File', ... | |
371 | $db->Filter_Push( Store => sub { }, | |
372 | Fetch => sub { }); | |
373 | ||
374 | The code reference associated with C<Store> will be called before any | |
375 | key/value is written to the database and the code reference associated | |
376 | with C<Fetch> will be called after any key/value is read from the | |
377 | database. | |
378 | ||
379 | For example, here is a sample filter that adds a trailing NULL character | |
380 | to all strings before they are written to the DBM file, and removes the | |
381 | trailing NULL when they are read from the DBM file | |
382 | ||
383 | my $db = tie %hash, 'SDBM_File', ... | |
384 | $db->Filter_Push( Store => sub { $_ .= "\x00" ; }, | |
385 | Fetch => sub { s/\x00$// ; }); | |
386 | ||
387 | ||
388 | Points to note: | |
389 | ||
390 | =over 5 | |
391 | ||
392 | =item 1. | |
393 | ||
394 | Both the Store and Fetch filters manipulate C<$_>. | |
395 | ||
396 | =back | |
397 | ||
398 | =head2 Canned Filters | |
399 | ||
400 | Immediate filters are useful for one-off situations. For more generic | |
401 | problems it can be useful to package the filter up in its own module. | |
402 | ||
403 | The usage is for a canned filter is: | |
404 | ||
405 | $db->Filter_Push("name", params) | |
406 | ||
407 | where | |
408 | ||
409 | =over 5 | |
410 | ||
411 | =item "name" | |
412 | ||
413 | is the name of the module to load. If the string specified does not | |
414 | contain the package separator characters "::", it is assumed to refer to | |
415 | the full module name "DBM_Filter::name". This means that the full names | |
416 | for canned filters, "null" and "utf8", included with this module are: | |
417 | ||
418 | DBM_Filter::null | |
419 | DBM_Filter::utf8 | |
420 | ||
421 | =item params | |
422 | ||
423 | any optional parameters that need to be sent to the filter. See the | |
424 | encode filter for an example of a module that uses parameters. | |
425 | ||
426 | =back | |
427 | ||
428 | The module that implements the canned filter can take one of two | |
429 | forms. Here is a template for the first | |
430 | ||
431 | package DBM_Filter::null ; | |
432 | ||
433 | use strict; | |
434 | use warnings; | |
435 | ||
436 | sub Store | |
437 | { | |
438 | # store code here | |
439 | } | |
440 | ||
441 | sub Fetch | |
442 | { | |
443 | # fetch code here | |
444 | } | |
445 | ||
446 | 1; | |
447 | ||
448 | ||
449 | Notes: | |
450 | ||
451 | =over 5 | |
452 | ||
453 | =item 1. | |
454 | ||
455 | The package name uses the C<DBM_Filter::> prefix. | |
456 | ||
457 | =item 2. | |
458 | ||
459 | The module I<must> have both a Store and a Fetch method. If only one is | |
460 | present, or neither are present, a fatal error will be thrown. | |
461 | ||
462 | =back | |
463 | ||
464 | The second form allows the filter to hold state information using a | |
465 | closure, thus: | |
466 | ||
467 | package DBM_Filter::encoding ; | |
468 | ||
469 | use strict; | |
470 | use warnings; | |
471 | ||
472 | sub Filter | |
473 | { | |
474 | my @params = @_ ; | |
475 | ||
476 | ... | |
477 | return { | |
478 | Store => sub { $_ = $encoding->encode($_) }, | |
479 | Fetch => sub { $_ = $encoding->decode($_) } | |
480 | } ; | |
481 | } | |
482 | ||
483 | 1; | |
484 | ||
485 | ||
486 | In this instance the "Store" and "Fetch" methods are encapsulated inside a | |
487 | "Filter" method. | |
488 | ||
489 | ||
490 | =head1 Filters Included | |
491 | ||
492 | A number of canned filers are provided with this module. They cover a | |
493 | number of the main areas that filters are needed when interfacing with | |
494 | DBM files. They also act as templates for your own filters. | |
495 | ||
496 | The filter included are: | |
497 | ||
498 | =over 5 | |
499 | ||
500 | =item * utf8 | |
501 | ||
502 | This module will ensure that all data written to the DBM will be encoded | |
503 | in UTF-8. | |
504 | ||
505 | This module needs the Encode module. | |
506 | ||
507 | =item * encode | |
508 | ||
509 | Allows you to choose the character encoding will be store in the DBM file. | |
510 | ||
511 | =item * compress | |
512 | ||
513 | This filter will compress all data before it is written to the database | |
514 | and uncompressed it on reading. | |
515 | ||
516 | This module needs Compress::Zlib. | |
517 | ||
518 | =item * int32 | |
519 | ||
520 | This module is used when interoperating with a C/C++ application that | |
521 | uses a C int as either the key and/or value in the DBM file. | |
522 | ||
523 | =item * null | |
524 | ||
525 | This module ensures that all data written to the DBM file is null | |
526 | terminated. This is useful when you have a perl script that needs | |
527 | to interoperate with a DBM file that a C program also uses. A fairly | |
528 | common issue is for the C application to include the terminating null | |
529 | in a string when it writes to the DBM file. This filter will ensure that | |
530 | all data written to the DBM file can be read by the C application. | |
531 | ||
532 | =back | |
533 | ||
534 | =head1 NOTES | |
535 | ||
536 | =head2 Maintain Round Trip Integrity | |
537 | ||
538 | When writing a DBM filter it is I<very> important to ensure that it is | |
539 | possible to retrieve all data that you have written when the DBM filter | |
540 | is in place. In practice, this means that whatever transformation is | |
541 | applied to the data in the Store method, the I<exact> inverse operation | |
542 | should be applied in the Fetch method. | |
543 | ||
544 | If you don't provide an exact inverse transformation, you will find that | |
545 | code like this will not behave as you expect. | |
546 | ||
547 | while (my ($k, $v) = each %hash) | |
548 | { | |
549 | ... | |
550 | } | |
551 | ||
552 | Depending on the transformation, you will find that one or more of the | |
553 | following will happen | |
554 | ||
555 | =over 5 | |
556 | ||
557 | =item 1 | |
558 | ||
559 | The loop will never terminate. | |
560 | ||
561 | =item 2 | |
562 | ||
563 | Too few records will be retrieved. | |
564 | ||
565 | =item 3 | |
566 | ||
567 | Too many will be retrieved. | |
568 | ||
569 | =item 4 | |
570 | ||
571 | The loop will do the right thing for a while, but it will unexpectedly fail. | |
572 | ||
573 | =back | |
574 | ||
575 | =head2 Don't mix filtered & non-filtered data in the same database file. | |
576 | ||
577 | This is just a restatement of the previous section. Unless you are | |
578 | completely certain you know what you are doing, avoid mixing filtered & | |
579 | non-filtered data. | |
580 | ||
581 | =head1 EXAMPLE | |
582 | ||
583 | Say you need to interoperate with a legacy C application that stores | |
584 | keys as C ints and the values and null terminated UTF-8 strings. Here | |
585 | is how you would set that up | |
586 | ||
587 | my $db = tie %hash, 'SDBM_File', ... | |
588 | ||
589 | $db->Filter_Key_Push('int32') ; | |
590 | ||
591 | $db->Filter_Value_Push('utf8'); | |
592 | $db->Filter_Value_Push('null'); | |
593 | ||
594 | =head1 SEE ALSO | |
595 | ||
596 | <DB_File>, L<GDBM_File>, L<NDBM_File>, L<ODBM_File>, L<SDBM_File>, L<perldbmfilter> | |
597 | ||
598 | =head1 AUTHOR | |
599 | ||
600 | Paul Marquess <pmqs@cpan.org> | |
601 |