| 1 | |
| 2 | package Memoize::Expire; |
| 3 | # require 5.00556; |
| 4 | use Carp; |
| 5 | $DEBUG = 0; |
| 6 | $VERSION = '1.00'; |
| 7 | |
| 8 | # This package will implement expiration by prepending a fixed-length header |
| 9 | # to the font of the cached data. The format of the header will be: |
| 10 | # (4-byte number of last-access-time) (For LRU when I implement it) |
| 11 | # (4-byte expiration time: unsigned seconds-since-unix-epoch) |
| 12 | # (2-byte number-of-uses-before-expire) |
| 13 | |
| 14 | sub _header_fmt () { "N N n" } |
| 15 | sub _header_size () { length(_header_fmt) } |
| 16 | |
| 17 | # Usage: memoize func |
| 18 | # TIE => [Memoize::Expire, LIFETIME => sec, NUM_USES => n, |
| 19 | # TIE => [...] ] |
| 20 | |
| 21 | BEGIN { |
| 22 | eval {require Time::HiRes}; |
| 23 | unless ($@) { |
| 24 | Time::HiRes->import('time'); |
| 25 | } |
| 26 | } |
| 27 | |
| 28 | sub TIEHASH { |
| 29 | my ($package, %args) = @_; |
| 30 | my %cache; |
| 31 | if ($args{TIE}) { |
| 32 | my ($module, @opts) = @{$args{TIE}}; |
| 33 | my $modulefile = $module . '.pm'; |
| 34 | $modulefile =~ s{::}{/}g; |
| 35 | eval { require $modulefile }; |
| 36 | if ($@) { |
| 37 | croak "Memoize::Expire: Couldn't load hash tie module `$module': $@; aborting"; |
| 38 | } |
| 39 | my $rc = (tie %cache => $module, @opts); |
| 40 | unless ($rc) { |
| 41 | croak "Memoize::Expire: Couldn't tie hash to `$module': $@; aborting"; |
| 42 | } |
| 43 | } |
| 44 | $args{LIFETIME} ||= 0; |
| 45 | $args{NUM_USES} ||= 0; |
| 46 | $args{C} = \%cache; |
| 47 | bless \%args => $package; |
| 48 | } |
| 49 | |
| 50 | sub STORE { |
| 51 | $DEBUG and print STDERR " >> Store $_[1] $_[2]\n"; |
| 52 | my ($self, $key, $value) = @_; |
| 53 | my $expire_time = $self->{LIFETIME} > 0 ? $self->{LIFETIME} + time : 0; |
| 54 | # The call that results in a value to store into the cache is the |
| 55 | # first of the NUM_USES allowed calls. |
| 56 | my $header = _make_header(time, $expire_time, $self->{NUM_USES}-1); |
| 57 | $self->{C}{$key} = $header . $value; |
| 58 | $value; |
| 59 | } |
| 60 | |
| 61 | sub FETCH { |
| 62 | $DEBUG and print STDERR " >> Fetch cached value for $_[1]\n"; |
| 63 | my ($data, $last_access, $expire_time, $num_uses_left) = _get_item($_[0]{C}{$_[1]}); |
| 64 | $DEBUG and print STDERR " >> (ttl: ", ($expire_time-time()), ", nuses: $num_uses_left)\n"; |
| 65 | $num_uses_left--; |
| 66 | $last_access = time; |
| 67 | _set_header(@_, $data, $last_access, $expire_time, $num_uses_left); |
| 68 | $data; |
| 69 | } |
| 70 | |
| 71 | sub EXISTS { |
| 72 | $DEBUG and print STDERR " >> Exists $_[1]\n"; |
| 73 | unless (exists $_[0]{C}{$_[1]}) { |
| 74 | $DEBUG and print STDERR " Not in underlying hash at all.\n"; |
| 75 | return 0; |
| 76 | } |
| 77 | my $item = $_[0]{C}{$_[1]}; |
| 78 | my ($last_access, $expire_time, $num_uses_left) = _get_header($item); |
| 79 | my $ttl = $expire_time - time; |
| 80 | if ($DEBUG) { |
| 81 | $_[0]{LIFETIME} and print STDERR " Time to live for this item: $ttl\n"; |
| 82 | $_[0]{NUM_USES} and print STDERR " Uses remaining: $num_uses_left\n"; |
| 83 | } |
| 84 | if ( (! $_[0]{LIFETIME} || $expire_time > time) |
| 85 | && (! $_[0]{NUM_USES} || $num_uses_left > 0 )) { |
| 86 | $DEBUG and print STDERR " (Still good)\n"; |
| 87 | return 1; |
| 88 | } else { |
| 89 | $DEBUG and print STDERR " (Expired)\n"; |
| 90 | return 0; |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | # Arguments: last access time, expire time, number of uses remaining |
| 95 | sub _make_header { |
| 96 | pack "N N n", @_; |
| 97 | } |
| 98 | |
| 99 | sub _strip_header { |
| 100 | substr($_[0], 10); |
| 101 | } |
| 102 | |
| 103 | # Arguments: last access time, expire time, number of uses remaining |
| 104 | sub _set_header { |
| 105 | my ($self, $key, $data, @header) = @_; |
| 106 | $self->{C}{$key} = _make_header(@header) . $data; |
| 107 | } |
| 108 | |
| 109 | sub _get_item { |
| 110 | my $data = substr($_[0], 10); |
| 111 | my @header = unpack "N N n", substr($_[0], 0, 10); |
| 112 | # print STDERR " >> _get_item: $data => $data @header\n"; |
| 113 | ($data, @header); |
| 114 | } |
| 115 | |
| 116 | # Return last access time, expire time, number of uses remaining |
| 117 | sub _get_header { |
| 118 | unpack "N N n", substr($_[0], 0, 10); |
| 119 | } |
| 120 | |
| 121 | 1; |
| 122 | |
| 123 | =head1 NAME |
| 124 | |
| 125 | Memoize::Expire - Plug-in module for automatic expiration of memoized values |
| 126 | |
| 127 | =head1 SYNOPSIS |
| 128 | |
| 129 | use Memoize; |
| 130 | use Memoize::Expire; |
| 131 | tie my %cache => 'Memoize::Expire', |
| 132 | LIFETIME => $lifetime, # In seconds |
| 133 | NUM_USES => $n_uses; |
| 134 | |
| 135 | memoize 'function', SCALAR_CACHE => [HASH => \%cache ]; |
| 136 | |
| 137 | =head1 DESCRIPTION |
| 138 | |
| 139 | Memoize::Expire is a plug-in module for Memoize. It allows the cached |
| 140 | values for memoized functions to expire automatically. This manual |
| 141 | assumes you are already familiar with the Memoize module. If not, you |
| 142 | should study that manual carefully first, paying particular attention |
| 143 | to the HASH feature. |
| 144 | |
| 145 | Memoize::Expire is a layer of software that you can insert in between |
| 146 | Memoize itself and whatever underlying package implements the cache. |
| 147 | The layer presents a hash variable whose values expire whenever they |
| 148 | get too old, have been used too often, or both. You tell C<Memoize> to |
| 149 | use this forgetful hash as its cache instead of the default, which is |
| 150 | an ordinary hash. |
| 151 | |
| 152 | To specify a real-time timeout, supply the C<LIFETIME> option with a |
| 153 | numeric value. Cached data will expire after this many seconds, and |
| 154 | will be looked up afresh when it expires. When a data item is looked |
| 155 | up afresh, its lifetime is reset. |
| 156 | |
| 157 | If you specify C<NUM_USES> with an argument of I<n>, then each cached |
| 158 | data item will be discarded and looked up afresh after the I<n>th time |
| 159 | you access it. When a data item is looked up afresh, its number of |
| 160 | uses is reset. |
| 161 | |
| 162 | If you specify both arguments, data will be discarded from the cache |
| 163 | when either expiration condition holds. |
| 164 | |
| 165 | Memoize::Expire uses a real hash internally to store the cached data. |
| 166 | You can use the C<HASH> option to Memoize::Expire to supply a tied |
| 167 | hash in place of the ordinary hash that Memoize::Expire will normally |
| 168 | use. You can use this feature to add Memoize::Expire as a layer in |
| 169 | between a persistent disk hash and Memoize. If you do this, you get a |
| 170 | persistent disk cache whose entries expire automatically. For |
| 171 | example: |
| 172 | |
| 173 | # Memoize |
| 174 | # | |
| 175 | # Memoize::Expire enforces data expiration policy |
| 176 | # | |
| 177 | # DB_File implements persistence of data in a disk file |
| 178 | # | |
| 179 | # Disk file |
| 180 | |
| 181 | use Memoize; |
| 182 | use Memoize::Expire; |
| 183 | use DB_File; |
| 184 | |
| 185 | # Set up persistence |
| 186 | tie my %disk_cache => 'DB_File', $filename, O_CREAT|O_RDWR, 0666]; |
| 187 | |
| 188 | # Set up expiration policy, supplying persistent hash as a target |
| 189 | tie my %cache => 'Memoize::Expire', |
| 190 | LIFETIME => $lifetime, # In seconds |
| 191 | NUM_USES => $n_uses, |
| 192 | HASH => \%disk_cache; |
| 193 | |
| 194 | # Set up memoization, supplying expiring persistent hash for cache |
| 195 | memoize 'function', SCALAR_CACHE => [ HASH => \%cache ]; |
| 196 | |
| 197 | =head1 INTERFACE |
| 198 | |
| 199 | There is nothing special about Memoize::Expire. It is just an |
| 200 | example. If you don't like the policy that it implements, you are |
| 201 | free to write your own expiration policy module that implements |
| 202 | whatever policy you desire. Here is how to do that. Let us suppose |
| 203 | that your module will be named MyExpirePolicy. |
| 204 | |
| 205 | Short summary: You need to create a package that defines four methods: |
| 206 | |
| 207 | =over 4 |
| 208 | |
| 209 | =item |
| 210 | TIEHASH |
| 211 | |
| 212 | Construct and return cache object. |
| 213 | |
| 214 | =item |
| 215 | EXISTS |
| 216 | |
| 217 | Given a function argument, is the corresponding function value in the |
| 218 | cache, and if so, is it fresh enough to use? |
| 219 | |
| 220 | =item |
| 221 | FETCH |
| 222 | |
| 223 | Given a function argument, look up the corresponding function value in |
| 224 | the cache and return it. |
| 225 | |
| 226 | =item |
| 227 | STORE |
| 228 | |
| 229 | Given a function argument and the corresponding function value, store |
| 230 | them into the cache. |
| 231 | |
| 232 | =item |
| 233 | CLEAR |
| 234 | |
| 235 | (Optional.) Flush the cache completely. |
| 236 | |
| 237 | =back |
| 238 | |
| 239 | The user who wants the memoization cache to be expired according to |
| 240 | your policy will say so by writing |
| 241 | |
| 242 | tie my %cache => 'MyExpirePolicy', args...; |
| 243 | memoize 'function', SCALAR_CACHE => [HASH => \%cache]; |
| 244 | |
| 245 | This will invoke C<< MyExpirePolicy->TIEHASH(args) >>. |
| 246 | MyExpirePolicy::TIEHASH should do whatever is appropriate to set up |
| 247 | the cache, and it should return the cache object to the caller. |
| 248 | |
| 249 | For example, MyExpirePolicy::TIEHASH might create an object that |
| 250 | contains a regular Perl hash (which it will to store the cached |
| 251 | values) and some extra information about the arguments and how old the |
| 252 | data is and things like that. Let us call this object `C'. |
| 253 | |
| 254 | When Memoize needs to check to see if an entry is in the cache |
| 255 | already, it will invoke C<< C->EXISTS(key) >>. C<key> is the normalized |
| 256 | function argument. MyExpirePolicy::EXISTS should return 0 if the key |
| 257 | is not in the cache, or if it has expired, and 1 if an unexpired value |
| 258 | is in the cache. It should I<not> return C<undef>, because there is a |
| 259 | bug in some versions of Perl that will cause a spurious FETCH if the |
| 260 | EXISTS method returns C<undef>. |
| 261 | |
| 262 | If your EXISTS function returns true, Memoize will try to fetch the |
| 263 | cached value by invoking C<< C->FETCH(key) >>. MyExpirePolicy::FETCH should |
| 264 | return the cached value. Otherwise, Memoize will call the memoized |
| 265 | function to compute the appropriate value, and will store it into the |
| 266 | cache by calling C<< C->STORE(key, value) >>. |
| 267 | |
| 268 | Here is a very brief example of a policy module that expires each |
| 269 | cache item after ten seconds. |
| 270 | |
| 271 | package Memoize::TenSecondExpire; |
| 272 | |
| 273 | sub TIEHASH { |
| 274 | my ($package, %args) = @_; |
| 275 | my $cache = $args{HASH} || {}; |
| 276 | bless $cache => $package; |
| 277 | } |
| 278 | |
| 279 | sub EXISTS { |
| 280 | my ($cache, $key) = @_; |
| 281 | if (exists $cache->{$key} && |
| 282 | $cache->{$key}{EXPIRE_TIME} > time) { |
| 283 | return 1 |
| 284 | } else { |
| 285 | return 0; # Do NOT return `undef' here. |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | sub FETCH { |
| 290 | my ($cache, $key) = @_; |
| 291 | return $cache->{$key}{VALUE}; |
| 292 | } |
| 293 | |
| 294 | sub STORE { |
| 295 | my ($cache, $key, $newvalue) = @_; |
| 296 | $cache->{$key}{VALUE} = $newvalue; |
| 297 | $cache->{$key}{EXPIRE_TIME} = time + 10; |
| 298 | } |
| 299 | |
| 300 | To use this expiration policy, the user would say |
| 301 | |
| 302 | use Memoize; |
| 303 | tie my %cache10sec => 'Memoize::TenSecondExpire'; |
| 304 | memoize 'function', SCALAR_CACHE => [HASH => \%cache10sec]; |
| 305 | |
| 306 | Memoize would then call C<function> whenever a cached value was |
| 307 | entirely absent or was older than ten seconds. |
| 308 | |
| 309 | You should always support a C<HASH> argument to C<TIEHASH> that ties |
| 310 | the underlying cache so that the user can specify that the cache is |
| 311 | also persistent or that it has some other interesting semantics. The |
| 312 | example above demonstrates how to do this, as does C<Memoize::Expire>. |
| 313 | |
| 314 | =head1 ALTERNATIVES |
| 315 | |
| 316 | Brent Powers has a C<Memoize::ExpireLRU> module that was designed to |
| 317 | work with Memoize and provides expiration of least-recently-used data. |
| 318 | The cache is held at a fixed number of entries, and when new data |
| 319 | comes in, the least-recently used data is expired. See |
| 320 | L<http://search.cpan.org/search?mode=module&query=ExpireLRU>. |
| 321 | |
| 322 | Joshua Chamas's Tie::Cache module may be useful as an expiration |
| 323 | manager. (If you try this, let me know how it works out.) |
| 324 | |
| 325 | If you develop any useful expiration managers that you think should be |
| 326 | distributed with Memoize, please let me know. |
| 327 | |
| 328 | =head1 CAVEATS |
| 329 | |
| 330 | This module is experimental, and may contain bugs. Please report bugs |
| 331 | to the address below. |
| 332 | |
| 333 | Number-of-uses is stored as a 16-bit unsigned integer, so can't exceed |
| 334 | 65535. |
| 335 | |
| 336 | Because of clock granularity, expiration times may occur up to one |
| 337 | second sooner than you expect. For example, suppose you store a value |
| 338 | with a lifetime of ten seconds, and you store it at 12:00:00.998 on a |
| 339 | certain day. Memoize will look at the clock and see 12:00:00. Then |
| 340 | 9.01 seconds later, at 12:00:10.008 you try to read it back. Memoize |
| 341 | will look at the clock and see 12:00:10 and conclude that the value |
| 342 | has expired. This will probably not occur if you have |
| 343 | C<Time::HiRes> installed. |
| 344 | |
| 345 | =head1 AUTHOR |
| 346 | |
| 347 | Mark-Jason Dominus (mjd-perl-memoize+@plover.com) |
| 348 | |
| 349 | Mike Cariaso provided valuable insight into the best way to solve this |
| 350 | problem. |
| 351 | |
| 352 | =head1 SEE ALSO |
| 353 | |
| 354 | perl(1) |
| 355 | |
| 356 | The Memoize man page. |
| 357 | |
| 358 | http://www.plover.com/~mjd/perl/Memoize/ (for news and updates) |
| 359 | |
| 360 | I maintain a mailing list on which I occasionally announce new |
| 361 | versions of Memoize. The list is for announcements only, not |
| 362 | discussion. To join, send an empty message to |
| 363 | mjd-perl-memoize-request@Plover.com. |
| 364 | |
| 365 | =cut |