Commit | Line | Data |
---|---|---|
1e73acc8 AS |
1 | package Hash::Util::FieldHash; |
2 | ||
3 | use 5.009004; | |
4 | use strict; | |
5 | use warnings; | |
1e73acc8 AS |
6 | use Scalar::Util qw( reftype); |
7 | ||
25694085 | 8 | our $VERSION = '1.12'; |
3ae03d21 | 9 | |
1e73acc8 AS |
10 | require Exporter; |
11 | our @ISA = qw(Exporter); | |
12 | our %EXPORT_TAGS = ( | |
13 | 'all' => [ qw( | |
14 | fieldhash | |
15 | fieldhashes | |
d74d639b AS |
16 | idhash |
17 | idhashes | |
18 | id | |
19 | id_2obj | |
20 | register | |
1e73acc8 AS |
21 | )], |
22 | ); | |
23 | our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); | |
1e73acc8 | 24 | |
1e73acc8 AS |
25 | { |
26 | require XSLoader; | |
6ff38c27 AS |
27 | my %ob_reg; # private object registry |
28 | sub _ob_reg { \ %ob_reg } | |
da4061d3 | 29 | XSLoader::load(); |
1e73acc8 AS |
30 | } |
31 | ||
32 | sub fieldhash (\%) { | |
33 | for ( shift ) { | |
34 | return unless ref() && reftype( $_) eq 'HASH'; | |
35 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); | |
d74d639b AS |
36 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2; |
37 | return; | |
38 | } | |
39 | } | |
40 | ||
41 | sub idhash (\%) { | |
42 | for ( shift ) { | |
43 | return unless ref() && reftype( $_) eq 'HASH'; | |
44 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); | |
45 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1; | |
1e73acc8 AS |
46 | return; |
47 | } | |
48 | } | |
49 | ||
50 | sub fieldhashes { map &fieldhash( $_), @_ } | |
d74d639b | 51 | sub idhashes { map &idhash( $_), @_ } |
1e73acc8 AS |
52 | |
53 | 1; | |
54 | __END__ | |
55 | ||
56 | =head1 NAME | |
57 | ||
d74d639b | 58 | Hash::Util::FieldHash - Support for Inside-Out Classes |
1e73acc8 | 59 | |
b299c47c | 60 | =head1 SYNOPSIS |
1e73acc8 | 61 | |
d74d639b | 62 | ### Create fieldhashes |
1e73acc8 | 63 | use Hash::Util qw(fieldhash fieldhashes); |
6ff38c27 | 64 | |
1e73acc8 AS |
65 | # Create a single field hash |
66 | fieldhash my %foo; | |
6ff38c27 | 67 | |
1e73acc8 AS |
68 | # Create three at once... |
69 | fieldhashes \ my(%foo, %bar, %baz); | |
70 | # ...or any number | |
71 | fieldhashes @hashrefs; | |
72 | ||
d74d639b | 73 | ### Create an idhash and register it for garbage collection |
ada3dff8 | 74 | use Hash::Util::FieldHash qw(idhash register); |
d74d639b | 75 | idhash my %name; |
ada3dff8 | 76 | my $object = \ do { my $o }; |
d74d639b | 77 | # register the idhash for garbage collection with $object |
ada3dff8 RGS |
78 | register($object, \ %name); |
79 | # the following entry will be deleted when $object goes out of scope | |
80 | $name{$object} = 'John Doe'; | |
b299c47c | 81 | |
d74d639b | 82 | ### Register an ordinary hash for garbage collection |
ada3dff8 | 83 | use Hash::Util::FieldHash qw(id register); |
d74d639b | 84 | my %name; |
ada3dff8 RGS |
85 | my $object = \ do { my $o }; |
86 | # register the hash %name for garbage collection of $object's id | |
87 | register $object, \ %name; | |
88 | # the following entry will be deleted when $object goes out of scope | |
89 | $name{id $object} = 'John Doe'; | |
d74d639b | 90 | |
b299c47c | 91 | =head1 FUNCTIONS |
1e73acc8 | 92 | |
d74d639b | 93 | C<Hash::Util::FieldHash> offers a number of functions in support of |
b299c47c | 94 | L<The Inside-out Technique> of class construction. |
1e73acc8 AS |
95 | |
96 | =over | |
97 | ||
d74d639b | 98 | =item id |
1e73acc8 | 99 | |
d74d639b | 100 | id($obj) |
1e73acc8 | 101 | |
d74d639b AS |
102 | Returns the reference address of a reference $obj. If $obj is |
103 | not a reference, returns $obj. | |
1e73acc8 | 104 | |
d74d639b | 105 | This function is a stand-in replacement for |
69895b01 | 106 | L<Scalar::Util::refaddr|Scalar::Util/refaddr EXPR>, that is, it returns |
d74d639b AS |
107 | the reference address of its argument as a numeric value. The only |
108 | difference is that C<refaddr()> returns C<undef> when given a | |
109 | non-reference while C<id()> returns its argument unchanged. | |
1e73acc8 | 110 | |
d74d639b AS |
111 | C<id()> also uses a caching technique that makes it faster when |
112 | the id of an object is requested often, but slower if it is needed | |
113 | only once or twice. | |
1e73acc8 | 114 | |
d74d639b | 115 | =item id_2obj |
1e73acc8 | 116 | |
d74d639b | 117 | $obj = id_2obj($id) |
1e73acc8 | 118 | |
d74d639b AS |
119 | If C<$id> is the id of a registered object (see L</register>), returns |
120 | the object, otherwise an undefined value. For registered objects this | |
121 | is the inverse function of C<id()>. | |
1e73acc8 | 122 | |
d74d639b | 123 | =item register |
1e73acc8 | 124 | |
d74d639b AS |
125 | register($obj) |
126 | register($obj, @hashrefs) | |
1e73acc8 | 127 | |
d74d639b AS |
128 | In the first form, registers an object to work with for the function |
129 | C<id_2obj()>. In the second form, it additionally marks the given | |
130 | hashrefs down for garbage collection. This means that when the object | |
131 | goes out of scope, any entries in the given hashes under the key of | |
132 | C<id($obj)> will be deleted from the hashes. | |
1e73acc8 | 133 | |
d74d639b AS |
134 | It is a fatal error to register a non-reference $obj. Any non-hashrefs |
135 | among the following arguments are silently ignored. | |
1e73acc8 | 136 | |
d74d639b AS |
137 | It is I<not> an error to register the same object multiple times with |
138 | varying sets of hashrefs. Any hashrefs that are not registered yet | |
139 | will be added, others ignored. | |
1e73acc8 | 140 | |
d74d639b AS |
141 | Registry also implies thread support. When a new thread is created, |
142 | all references are replaced with new ones, including all objects. | |
143 | If a hash uses the reference address of an object as a key, that | |
144 | connection would be broken. With a registered object, its id will | |
145 | be updated in all hashes registered with it. | |
1e73acc8 | 146 | |
d74d639b | 147 | =item idhash |
1e73acc8 | 148 | |
d74d639b | 149 | idhash my %hash |
1e73acc8 | 150 | |
d74d639b | 151 | Makes an idhash from the argument, which must be a hash. |
1e73acc8 | 152 | |
d74d639b AS |
153 | An I<idhash> works like a normal hash, except that it stringifies a |
154 | I<reference used as a key> differently. A reference is stringified | |
155 | as if the C<id()> function had been invoked on it, that is, its | |
156 | reference address in decimal is used as the key. | |
1e73acc8 | 157 | |
d74d639b | 158 | =item idhashes |
1e73acc8 | 159 | |
ada3dff8 | 160 | idhashes \ my(%hash, %gnash, %trash) |
d74d639b | 161 | idhashes \ @hashrefs |
1e73acc8 | 162 | |
d74d639b AS |
163 | Creates many idhashes from its hashref arguments. Returns those |
164 | arguments that could be converted or their number in scalar context. | |
1e73acc8 | 165 | |
d74d639b | 166 | =item fieldhash |
1e73acc8 | 167 | |
d74d639b | 168 | fieldhash %hash; |
1e73acc8 | 169 | |
d74d639b AS |
170 | Creates a single fieldhash. The argument must be a hash. Returns |
171 | a reference to the given hash if successful, otherwise nothing. | |
1e73acc8 | 172 | |
d74d639b AS |
173 | A I<fieldhash> is, in short, an idhash with auto-registry. When an |
174 | object (or, indeed, any reference) is used as a fieldhash key, the | |
175 | fieldhash is automatically registered for garbage collection with | |
176 | the object, as if C<register $obj, \ %fieldhash> had been called. | |
1e73acc8 | 177 | |
d74d639b | 178 | =item fieldhashes |
1e73acc8 | 179 | |
d74d639b | 180 | fieldhashes @hashrefs; |
1e73acc8 | 181 | |
d74d639b AS |
182 | Creates any number of field hashes. Arguments must be hash references. |
183 | Returns the converted hashrefs in list context, their number in scalar | |
184 | context. | |
1e73acc8 AS |
185 | |
186 | =back | |
187 | ||
b299c47c | 188 | =head1 DESCRIPTION |
1e73acc8 | 189 | |
d74d639b AS |
190 | A word on terminology: I shall use the term I<field> for a scalar |
191 | piece of data that a class associates with an object. Other terms that | |
192 | have been used for this concept are "object variable", "(object) property", | |
ada3dff8 | 193 | "(object) attribute" and more. Especially "attribute" has some currency |
d74d639b AS |
194 | among Perl programmer, but that clashes with the C<attributes> pragma. The |
195 | term "field" also has some currency in this sense and doesn't seem | |
196 | to conflict with other Perl terminology. | |
197 | ||
198 | In Perl, an object is a blessed reference. The standard way of associating | |
b7ffd429 | 199 | data with an object is to store the data inside the object's body, that is, |
d74d639b AS |
200 | the piece of data pointed to by the reference. |
201 | ||
202 | In consequence, if two or more classes want to access an object they | |
ada3dff8 | 203 | I<must> agree on the type of reference and also on the organization of |
d74d639b AS |
204 | data within the object body. Failure to agree on the type results in |
205 | immediate death when the wrong method tries to access an object. Failure | |
206 | to agree on data organization may lead to one class trampling over the | |
207 | data of another. | |
208 | ||
209 | This object model leads to a tight coupling between subclasses. | |
210 | If one class wants to inherit from another (and both classes access | |
211 | object data), the classes must agree about implementation details. | |
212 | Inheritance can only be used among classes that are maintained together, | |
213 | in a single source or not. | |
214 | ||
215 | In particular, it is not possible to write general-purpose classes | |
216 | in this technique, classes that can advertise themselves as "Put me | |
217 | on your @ISA list and use my methods". If the other class has different | |
218 | ideas about how the object body is used, there is trouble. | |
219 | ||
69895b01 | 220 | For reference C<Name_hash> in L</Example 1> shows the standard implementation of |
d74d639b AS |
221 | a simple class C<Name> in the well-known hash based way. It also demonstrates |
222 | the predictable failure to construct a common subclass C<NamedFile> | |
223 | of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs). | |
224 | ||
225 | Thus, techniques are of interest that store object data I<not> in | |
226 | the object body but some other place. | |
227 | ||
228 | =head2 The Inside-out Technique | |
229 | ||
230 | With I<inside-out> classes, each class declares a (typically lexical) | |
231 | hash for each field it wants to use. The reference address of an | |
232 | object is used as the hash key. By definition, the reference address | |
233 | is unique to each object so this guarantees a place for each field that | |
69895b01 KW |
234 | is private to the class and unique to each object. See C<Name_id> |
235 | in L</Example 1> for a simple example. | |
d74d639b AS |
236 | |
237 | In comparison to the standard implementation where the object is a | |
238 | hash and the fields correspond to hash keys, here the fields correspond | |
239 | to hashes, and the object determines the hash key. Thus the hashes | |
240 | appear to be turned I<inside out>. | |
241 | ||
242 | The body of an object is never examined by an inside-out class, only | |
243 | its reference address is used. This allows for the body of an actual | |
244 | object to be I<anything at all> while the object methods of the class | |
245 | still work as designed. This is a key feature of inside-out classes. | |
246 | ||
247 | =head2 Problems of Inside-out | |
248 | ||
249 | Inside-out classes give us freedom of inheritance, but as usual there | |
250 | is a price. | |
251 | ||
252 | Most obviously, there is the necessity of retrieving the reference | |
253 | address of an object for each data access. It's a minor inconvenience, | |
254 | but it does clutter the code. | |
255 | ||
256 | More important (and less obvious) is the necessity of garbage | |
257 | collection. When a normal object dies, anything stored in the | |
258 | object body is garbage-collected by perl. With inside-out objects, | |
259 | Perl knows nothing about the data stored in field hashes by a class, | |
260 | but these must be deleted when the object goes out of scope. Thus | |
261 | the class must provide a C<DESTROY> method to take care of that. | |
262 | ||
263 | In the presence of multiple classes it can be non-trivial | |
264 | to make sure that every relevant destructor is called for | |
265 | every object. Perl calls the first one it finds on the | |
266 | inheritance tree (if any) and that's it. | |
267 | ||
268 | A related issue is thread-safety. When a new thread is created, | |
269 | the Perl interpreter is cloned, which implies that all reference | |
270 | addresses in use will be replaced with new ones. Thus, if a class | |
271 | tries to access a field of a cloned object its (cloned) data will | |
272 | still be stored under the now invalid reference address of the | |
273 | original in the parent thread. A general C<CLONE> method must | |
274 | be provided to re-establish the association. | |
275 | ||
276 | =head2 Solutions | |
277 | ||
278 | C<Hash::Util::FieldHash> addresses these issues on several | |
279 | levels. | |
280 | ||
281 | The C<id()> function is provided in addition to the | |
282 | existing C<Scalar::Util::refaddr()>. Besides its short name | |
283 | it can be a little faster under some circumstances (and a | |
284 | bit slower under others). Benchmark if it matters. The | |
285 | working of C<id()> also allows the use of the class name | |
b299c47c | 286 | as a I<generic object> as described L<further down|/"The Generic Object">. |
d74d639b AS |
287 | |
288 | The C<id()> function is incorporated in I<id hashes> in the sense | |
289 | that it is called automatically on every key that is used with | |
290 | the hash. No explicit call is necessary. | |
291 | ||
292 | The problems of garbage collection and thread safety are both | |
293 | addressed by the function C<register()>. It registers an object | |
294 | together with any number of hashes. Registry means that when the | |
295 | object dies, an entry in any of the hashes under the reference | |
296 | address of this object will be deleted. This guarantees garbage | |
297 | collection in these hashes. It also means that on thread | |
298 | cloning the object's entries in registered hashes will be | |
299 | replaced with updated entries whose key is the cloned object's | |
300 | reference address. Thus the object-data association becomes | |
301 | thread-safe. | |
302 | ||
303 | Object registry is best done when the object is initialized | |
304 | for use with a class. That way, garbage collection and thread | |
305 | safety are established for every object and every field that is | |
306 | initialized. | |
307 | ||
308 | Finally, I<field hashes> incorporate all these functions in one | |
309 | package. Besides automatically calling the C<id()> function | |
310 | on every object used as a key, the object is registered with | |
311 | the field hash on first use. Classes based on field hashes | |
312 | are fully garbage-collected and thread safe without further | |
313 | measures. | |
314 | ||
315 | =head2 More Problems | |
316 | ||
317 | Another problem that occurs with inside-out classes is serialization. | |
318 | Since the object data is not in its usual place, standard routines | |
319 | like C<Storable::freeze()>, C<Storable::thaw()> and | |
320 | C<Data::Dumper::Dumper()> can't deal with it on their own. Both | |
321 | C<Data::Dumper> and C<Storable> provide the necessary hooks to | |
322 | make things work, but the functions or methods used by the hooks | |
323 | must be provided by each inside-out class. | |
324 | ||
325 | A general solution to the serialization problem would require another | |
92a34c45 | 326 | level of registry, one that associates I<classes> and fields. |
b7ffd429 | 327 | So far, the functions of C<Hash::Util::FieldHash> are unaware of |
d74d639b AS |
328 | any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash> |
329 | doesn't address the serialization problems. | |
330 | ||
331 | =head2 The Generic Object | |
332 | ||
333 | Classes based on the C<id()> function (and hence classes based on | |
334 | C<idhash()> and C<fieldhash()>) show a peculiar behavior in that | |
335 | the class name can be used like an object. Specifically, methods | |
336 | that set or read data associated with an object continue to work as | |
337 | class methods, just as if the class name were an object, distinct from | |
338 | all other objects, with its own data. This object may be called | |
339 | the I<generic object> of the class. | |
340 | ||
ada3dff8 | 341 | This works because field hashes respond to keys that are not references |
d74d639b AS |
342 | like a normal hash would and use the string offered as the hash key. |
343 | Thus, if a method is called as a class method, the field hash is presented | |
344 | with the class name instead of an object and blithely uses it as a key. | |
345 | Since the keys of real objects are decimal numbers, there is no | |
346 | conflict and the slot in the field hash can be used like any other. | |
347 | The C<id()> function behaves correspondingly with respect to non-reference | |
348 | arguments. | |
349 | ||
350 | Two possible uses (besides ignoring the property) come to mind. | |
351 | A singleton class could be implemented this using the generic object. | |
352 | If necessary, an C<init()> method could die or ignore calls with | |
353 | actual objects (references), so only the generic object will ever exist. | |
354 | ||
355 | Another use of the generic object would be as a template. It is | |
356 | a convenient place to store class-specific defaults for various | |
357 | fields to be used in actual object initialization. | |
358 | ||
359 | Usually, the feature can be entirely ignored. Calling I<object | |
360 | methods> as I<class methods> normally leads to an error and isn't used | |
361 | routinely anywhere. It may be a problem that this error isn't | |
362 | indicated by a class with a generic object. | |
363 | ||
364 | =head2 How to use Field Hashes | |
1e73acc8 AS |
365 | |
366 | Traditionally, the definition of an inside-out class contains a bare | |
367 | block inside which a number of lexical hashes are declared and the | |
368 | basic accessor methods defined, usually through C<Scalar::Util::refaddr>. | |
369 | Further methods may be defined outside this block. There has to be | |
370 | a DESTROY method and, for thread support, a CLONE method. | |
371 | ||
ada3dff8 | 372 | When field hashes are used, the basic structure remains the same. |
1e73acc8 AS |
373 | Each lexical hash will be made a field hash. The call to C<refaddr> |
374 | can be omitted from the accessor methods. DESTROY and CLONE methods | |
375 | are not necessary. | |
376 | ||
377 | If you have an existing inside-out class, simply making all hashes | |
378 | field hashes with no other change should make no difference. Through | |
379 | the calls to C<refaddr> or equivalent, the field hashes never get to | |
380 | see a reference and work like normal hashes. Your DESTROY (and | |
381 | CLONE) methods are still needed. | |
382 | ||
383 | To make the field hashes kick in, it is easiest to redefine C<refaddr> | |
384 | as | |
385 | ||
386 | sub refaddr { shift } | |
387 | ||
388 | instead of importing it from C<Scalar::Util>. It should now be possible | |
389 | to disable DESTROY and CLONE. Note that while it isn't disabled, | |
390 | DESTROY will be called before the garbage collection of field hashes, | |
6ff38c27 AS |
391 | so it will be invoked with a functional object and will continue to |
392 | function. | |
393 | ||
394 | It is not desirable to import the functions C<fieldhash> and/or | |
395 | C<fieldhashes> into every class that is going to use them. They | |
396 | are only used once to set up the class. When the class is up and running, | |
397 | these functions serve no more purpose. | |
1e73acc8 | 398 | |
1e73acc8 AS |
399 | If there are only a few field hashes to declare, it is simplest to |
400 | ||
401 | use Hash::Util::FieldHash; | |
402 | ||
403 | early and call the functions qualified: | |
404 | ||
405 | Hash::Util::FieldHash::fieldhash my %foo; | |
406 | ||
407 | Otherwise, import the functions into a convenient package like | |
d74d639b | 408 | C<HUF> or, more general, C<Aux> |
1e73acc8 AS |
409 | |
410 | { | |
411 | package Aux; | |
412 | use Hash::Util::FieldHash ':all'; | |
413 | } | |
414 | ||
415 | and call | |
416 | ||
417 | Aux::fieldhash my %foo; | |
418 | ||
419 | as needed. | |
420 | ||
d74d639b AS |
421 | =head2 Garbage-Collected Hashes |
422 | ||
423 | Garbage collection in a field hash means that entries will "spontaneously" | |
424 | disappear when the object that created them disappears. That must be | |
425 | borne in mind, especially when looping over a field hash. If anything | |
426 | you do inside the loop could cause an object to go out of scope, a | |
427 | random key may be deleted from the hash you are looping over. That | |
428 | can throw the loop iterator, so it's best to cache a consistent snapshot | |
429 | of the keys and/or values and loop over that. You will still have to | |
430 | check that a cached entry still exists when you get to it. | |
431 | ||
432 | Garbage collection can be confusing when keys are created in a field hash | |
433 | from normal scalars as well as references. Once a reference is I<used> with | |
434 | a field hash, the entry will be collected, even if it was later overwritten | |
435 | with a plain scalar key (every positive integer is a candidate). This | |
436 | is true even if the original entry was deleted in the meantime. In fact, | |
437 | deletion from a field hash, and also a test for existence constitute | |
438 | I<use> in this sense and create a liability to delete the entry when | |
439 | the reference goes out of scope. If you happen to create an entry | |
440 | with an identical key from a string or integer, that will be collected | |
441 | instead. Thus, mixed use of references and plain scalars as field hash | |
442 | keys is not entirely supported. | |
443 | ||
b299c47c | 444 | =head1 EXAMPLES |
1e73acc8 | 445 | |
d74d639b AS |
446 | The examples show a very simple class that implements a I<name>, consisting |
447 | of a first and last name (no middle initial). The name class has four | |
448 | methods: | |
1e73acc8 | 449 | |
d74d639b | 450 | =over |
1e73acc8 | 451 | |
d74d639b | 452 | =item * C<init()> |
1e73acc8 | 453 | |
d74d639b AS |
454 | An object method that initializes the first and last name to its |
455 | two arguments. If called as a class method, C<init()> creates an | |
456 | object in the given class and initializes that. | |
1e73acc8 | 457 | |
d74d639b | 458 | =item * C<first()> |
1e73acc8 | 459 | |
d74d639b | 460 | Retrieve the first name |
1e73acc8 | 461 | |
d74d639b | 462 | =item * C<last()> |
1e73acc8 | 463 | |
d74d639b | 464 | Retrieve the last name |
1e73acc8 | 465 | |
d74d639b | 466 | =item * C<name()> |
1e73acc8 | 467 | |
d74d639b | 468 | Retrieve the full name, the first and last name joined by a blank. |
1e73acc8 | 469 | |
d74d639b | 470 | =back |
1e73acc8 | 471 | |
d74d639b AS |
472 | The examples show this class implemented with different levels of |
473 | support by C<Hash::Util::FieldHash>. All supported combinations | |
474 | are shown. The difference between implementations is often quite | |
475 | small. The implementations are: | |
1e73acc8 | 476 | |
d74d639b | 477 | =over |
1e73acc8 | 478 | |
d74d639b | 479 | =item * C<Name_hash> |
1e73acc8 | 480 | |
d74d639b AS |
481 | A conventional (not inside-out) implementation where an object is |
482 | a hash that stores the field values, without support by | |
483 | C<Hash::Util::FieldHash>. This implementation doesn't allow | |
484 | arbitrary inheritance. | |
485 | ||
486 | =item * C<Name_id> | |
487 | ||
488 | Inside-out implementation based on the C<id()> function. It needs | |
489 | a C<DESTROY> method. For thread support a C<CLONE> method (not shown) | |
490 | would also be needed. Instead of C<Hash::Util::FieldHash::id()> the | |
491 | function C<Scalar::Util::refaddr> could be used with very little | |
492 | functional difference. This is the basic pattern of an inside-out | |
493 | class. | |
494 | ||
495 | =item * C<Name_idhash> | |
496 | ||
69895b01 | 497 | Idhash-based inside-out implementation. Like C<Name_id> it needs |
d74d639b AS |
498 | a C<DESTROY> method and would need C<CLONE> for thread support. |
499 | ||
500 | =item * C<Name_id_reg> | |
501 | ||
502 | Inside-out implementation based on the C<id()> function with explicit | |
503 | object registry. No destructor is needed and objects are thread safe. | |
504 | ||
505 | =item * C<Name_idhash_reg> | |
506 | ||
507 | Idhash-based inside-out implementation with explicit object registry. | |
508 | No destructor is needed and objects are thread safe. | |
509 | ||
510 | =item * C<Name_fieldhash> | |
511 | ||
b7ffd429 | 512 | FieldHash-based inside-out implementation. Object registry happens |
d74d639b AS |
513 | automatically. No destructor is needed and objects are thread safe. |
514 | ||
515 | =back | |
516 | ||
517 | These examples are realized in the code below, which could be copied | |
518 | to a file F<Example.pm>. | |
519 | ||
520 | =head2 Example 1 | |
521 | ||
522 | use strict; use warnings; | |
523 | ||
524 | { | |
555bd962 BG |
525 | package Name_hash; # standard implementation: the |
526 | # object is a hash | |
d74d639b AS |
527 | sub init { |
528 | my $obj = shift; | |
ada3dff8 | 529 | my ($first, $last) = @_; |
d74d639b AS |
530 | # create an object if called as class method |
531 | $obj = bless {}, $obj unless ref $obj; | |
532 | $obj->{ first} = $first; | |
533 | $obj->{ last} = $last; | |
534 | $obj; | |
535 | } | |
536 | ||
537 | sub first { shift()->{ first} } | |
538 | sub last { shift()->{ last} } | |
539 | ||
540 | sub name { | |
541 | my $n = shift; | |
542 | join ' ' => $n->first, $n->last; | |
543 | } | |
6ff38c27 | 544 | |
1e73acc8 AS |
545 | } |
546 | ||
d74d639b AS |
547 | { |
548 | package Name_id; | |
ada3dff8 | 549 | use Hash::Util::FieldHash qw(id); |
d74d639b | 550 | |
ada3dff8 | 551 | my (%first, %last); |
d74d639b AS |
552 | |
553 | sub init { | |
554 | my $obj = shift; | |
ada3dff8 | 555 | my ($first, $last) = @_; |
d74d639b AS |
556 | # create an object if called as class method |
557 | $obj = bless \ my $o, $obj unless ref $obj; | |
558 | $first{ id $obj} = $first; | |
559 | $last{ id $obj} = $last; | |
560 | $obj; | |
561 | } | |
562 | ||
563 | sub first { $first{ id shift()} } | |
564 | sub last { $last{ id shift()} } | |
565 | ||
566 | sub name { | |
567 | my $n = shift; | |
568 | join ' ' => $n->first, $n->last; | |
569 | } | |
570 | ||
571 | sub DESTROY { | |
572 | my $id = id shift; | |
573 | delete $first{ $id}; | |
574 | delete $last{ $id}; | |
575 | } | |
1e73acc8 | 576 | |
1e73acc8 AS |
577 | } |
578 | ||
d74d639b AS |
579 | { |
580 | package Name_idhash; | |
581 | use Hash::Util::FieldHash; | |
582 | ||
ada3dff8 | 583 | Hash::Util::FieldHash::idhashes( \ my (%first, %last) ); |
d74d639b AS |
584 | |
585 | sub init { | |
586 | my $obj = shift; | |
ada3dff8 | 587 | my ($first, $last) = @_; |
d74d639b AS |
588 | # create an object if called as class method |
589 | $obj = bless \ my $o, $obj unless ref $obj; | |
590 | $first{ $obj} = $first; | |
591 | $last{ $obj} = $last; | |
592 | $obj; | |
593 | } | |
594 | ||
595 | sub first { $first{ shift()} } | |
596 | sub last { $last{ shift()} } | |
597 | ||
598 | sub name { | |
599 | my $n = shift; | |
600 | join ' ' => $n->first, $n->last; | |
601 | } | |
602 | ||
603 | sub DESTROY { | |
604 | my $n = shift; | |
605 | delete $first{ $n}; | |
606 | delete $last{ $n}; | |
607 | } | |
608 | ||
1e73acc8 AS |
609 | } |
610 | ||
d74d639b AS |
611 | { |
612 | package Name_id_reg; | |
ada3dff8 | 613 | use Hash::Util::FieldHash qw(id register); |
d74d639b | 614 | |
ada3dff8 | 615 | my (%first, %last); |
d74d639b AS |
616 | |
617 | sub init { | |
618 | my $obj = shift; | |
ada3dff8 | 619 | my ($first, $last) = @_; |
d74d639b AS |
620 | # create an object if called as class method |
621 | $obj = bless \ my $o, $obj unless ref $obj; | |
ada3dff8 | 622 | register( $obj, \ (%first, %last) ); |
d74d639b AS |
623 | $first{ id $obj} = $first; |
624 | $last{ id $obj} = $last; | |
625 | $obj; | |
626 | } | |
627 | ||
628 | sub first { $first{ id shift()} } | |
629 | sub last { $last{ id shift()} } | |
630 | ||
631 | sub name { | |
632 | my $n = shift; | |
633 | join ' ' => $n->first, $n->last; | |
634 | } | |
635 | } | |
636 | ||
637 | { | |
638 | package Name_idhash_reg; | |
ada3dff8 | 639 | use Hash::Util::FieldHash qw(register); |
d74d639b | 640 | |
ada3dff8 | 641 | Hash::Util::FieldHash::idhashes \ my (%first, %last); |
d74d639b AS |
642 | |
643 | sub init { | |
644 | my $obj = shift; | |
ada3dff8 | 645 | my ($first, $last) = @_; |
d74d639b AS |
646 | # create an object if called as class method |
647 | $obj = bless \ my $o, $obj unless ref $obj; | |
ada3dff8 | 648 | register( $obj, \ (%first, %last) ); |
d74d639b AS |
649 | $first{ $obj} = $first; |
650 | $last{ $obj} = $last; | |
651 | $obj; | |
652 | } | |
653 | ||
654 | sub first { $first{ shift()} } | |
655 | sub last { $last{ shift()} } | |
656 | ||
657 | sub name { | |
658 | my $n = shift; | |
659 | join ' ' => $n->first, $n->last; | |
660 | } | |
661 | } | |
662 | ||
663 | { | |
664 | package Name_fieldhash; | |
665 | use Hash::Util::FieldHash; | |
666 | ||
ada3dff8 | 667 | Hash::Util::FieldHash::fieldhashes \ my (%first, %last); |
d74d639b AS |
668 | |
669 | sub init { | |
670 | my $obj = shift; | |
ada3dff8 | 671 | my ($first, $last) = @_; |
d74d639b AS |
672 | # create an object if called as class method |
673 | $obj = bless \ my $o, $obj unless ref $obj; | |
674 | $first{ $obj} = $first; | |
675 | $last{ $obj} = $last; | |
676 | $obj; | |
677 | } | |
678 | ||
679 | sub first { $first{ shift()} } | |
680 | sub last { $last{ shift()} } | |
681 | ||
682 | sub name { | |
683 | my $n = shift; | |
684 | join ' ' => $n->first, $n->last; | |
685 | } | |
686 | } | |
687 | ||
688 | 1; | |
689 | ||
b299c47c | 690 | To exercise the various implementations the script L<below|/"Example 2"> can |
d74d639b AS |
691 | be used. |
692 | ||
693 | It sets up a class C<Name> that is a mirror of one of the implementation | |
694 | classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines | |
695 | which implementation is run. | |
696 | ||
697 | The script first verifies the function of the C<Name> class. | |
698 | ||
b7ffd429 | 699 | In the second step, the free inheritability of the implementation |
d74d639b AS |
700 | (or lack thereof) is demonstrated. For this purpose it constructs |
701 | a class called C<NamedFile> which is a common subclass of C<Name> and | |
702 | the standard class C<IO::File>. This puts inheritability to the test | |
703 | because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile> | |
704 | should behave like a file opened for reading and also support the C<name()> | |
705 | method. This class juncture works with exception of the C<Name_hash> | |
706 | implementation, where object initialization fails because of the | |
707 | incompatibility of object bodies. | |
708 | ||
709 | =head2 Example 2 | |
710 | ||
711 | use strict; use warnings; $| = 1; | |
712 | ||
713 | use Example; | |
714 | ||
715 | { | |
716 | package Name; | |
555bd962 | 717 | use base 'Name_id'; # define here which implementation to run |
d74d639b AS |
718 | } |
719 | ||
720 | ||
721 | # Verify that the base package works | |
ada3dff8 | 722 | my $n = Name->init(qw(Albert Einstein)); |
d74d639b AS |
723 | print $n->name, "\n"; |
724 | print "\n"; | |
725 | ||
726 | # Create a named file handle (See definition below) | |
ada3dff8 | 727 | my $nf = NamedFile->init(qw(/tmp/x Filomena File)); |
d74d639b AS |
728 | # use as a file handle... |
729 | for ( 1 .. 3 ) { | |
730 | my $l = <$nf>; | |
731 | print "line $_: $l"; | |
732 | } | |
733 | # ...and as a Name object | |
734 | print "...brought to you by ", $nf->name, "\n"; | |
735 | exit; | |
736 | ||
737 | ||
738 | # Definition of NamedFile | |
739 | package NamedFile; | |
740 | use base 'Name'; | |
741 | use base 'IO::File'; | |
742 | ||
743 | sub init { | |
744 | my $obj = shift; | |
ada3dff8 | 745 | my ($file, $first, $last) = @_; |
d74d639b | 746 | $obj = $obj->IO::File::new() unless ref $obj; |
ada3dff8 RGS |
747 | $obj->open($file) or die "Can't read '$file': $!"; |
748 | $obj->Name::init($first, $last); | |
d74d639b AS |
749 | } |
750 | __END__ | |
1e73acc8 | 751 | |
1e73acc8 | 752 | |
b299c47c | 753 | =head1 GUTS |
1e73acc8 AS |
754 | |
755 | To make C<Hash::Util::FieldHash> work, there were two changes to | |
b7b1e41b | 756 | F<perl> itself. C<PERL_MAGIC_uvar> was made available for hashes, |
1e73acc8 AS |
757 | and weak references now call uvar C<get> magic after a weakref has been |
758 | cleared. The first feature is used to make field hashes intercept | |
759 | their keys upon access. The second one triggers garbage collection. | |
760 | ||
761 | =head2 The C<PERL_MAGIC_uvar> interface for hashes | |
762 | ||
763 | C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and | |
764 | C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which | |
765 | defines the interface. The call happens for hashes with "uvar" magic | |
766 | if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set> | |
767 | fields. Hashes are unaffected if (and as long as) these fields | |
768 | hold different values. | |
769 | ||
770 | Upon the call, the C<mg_obj> field will hold the hash key to be accessed. | |
771 | Upon return, the C<SV*> value in C<mg_obj> will be used in place of the | |
772 | original key in the hash access. The integer index value in the first | |
773 | parameter will be the C<action> value from C<hv_fetch_common>, or -1 | |
774 | if the call is from C<hv_delete_common>. | |
775 | ||
776 | This is a template for a function suitable for the C<uf_val> field in | |
777 | a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index> | |
778 | fields are irrelevant. | |
779 | ||
780 | IV watch_key(pTHX_ IV action, SV* field) { | |
781 | MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); | |
782 | SV* keysv = mg->mg_obj; | |
783 | /* Do whatever you need to. If you decide to | |
784 | supply a different key newkey, return it like this | |
785 | */ | |
786 | sv_2mortal(newkey); | |
787 | mg->mg_obj = newkey; | |
788 | return 0; | |
789 | } | |
790 | ||
791 | =head2 Weakrefs call uvar magic | |
792 | ||
793 | When a weak reference is stored in an C<SV> that has "uvar" magic, C<set> | |
794 | magic is called after the reference has gone stale. This hook can be | |
795 | used to trigger further garbage-collection activities associated with | |
796 | the referenced object. | |
797 | ||
798 | =head2 How field hashes work | |
799 | ||
800 | The three features of key hashes, I<key replacement>, I<thread support>, | |
801 | and I<garbage collection> are supported by a data structure called | |
6ff38c27 AS |
802 | the I<object registry>. This is a private hash where every object |
803 | is stored. An "object" in this sense is any reference (blessed or | |
804 | unblessed) that has been used as a field hash key. | |
1e73acc8 AS |
805 | |
806 | The object registry keeps track of references that have been used as | |
807 | field hash keys. The keys are generated from the reference address | |
808 | like in a field hash (though the registry isn't a field hash). Each | |
809 | value is a weak copy of the original reference, stored in an C<SV> that | |
810 | is itself magical (C<PERL_MAGIC_uvar> again). The magical structure | |
811 | holds a list (another hash, really) of field hashes that the reference | |
812 | has been used with. When the weakref becomes stale, the magic is | |
813 | activated and uses the list to delete the reference from all field | |
814 | hashes it has been used with. After that, the entry is removed from | |
815 | the object registry itself. Implicitly, that frees the magic structure | |
816 | and the storage it has been using. | |
817 | ||
818 | Whenever a reference is used as a field hash key, the object registry | |
819 | is checked and a new entry is made if necessary. The field hash is | |
820 | then added to the list of fields this reference has used. | |
821 | ||
822 | The object registry is also used to repair a field hash after thread | |
823 | cloning. Here, the entire object registry is processed. For every | |
824 | reference found there, the field hashes it has used are visited and | |
825 | the entry is updated. | |
826 | ||
827 | =head2 Internal function Hash::Util::FieldHash::_fieldhash | |
828 | ||
829 | # test if %hash is a field hash | |
830 | my $result = _fieldhash \ %hash, 0; | |
831 | ||
832 | # make %hash a field hash | |
833 | my $result = _fieldhash \ %hash, 1; | |
834 | ||
835 | C<_fieldhash> is the internal function used to create field hashes. | |
836 | It takes two arguments, a hashref and a mode. If the mode is boolean | |
837 | false, the hash is not changed but tested if it is a field hash. If | |
838 | the hash isn't a field hash the return value is boolean false. If it | |
839 | is, the return value indicates the mode of field hash. When called with | |
840 | a boolean true mode, it turns the given hash into a field hash of this | |
841 | mode, returning the mode of the created field hash. C<_fieldhash> | |
842 | does not erase the given hash. | |
843 | ||
844 | Currently there is only one type of field hash, and only the boolean | |
845 | value of the mode makes a difference, but that may change. | |
846 | ||
847 | =head1 AUTHOR | |
848 | ||
3ae03d21 AS |
849 | Anno Siegel (ANNO) wrote the xs code and the changes in perl proper |
850 | Jerry Hedden (JDHEDDEN) made it faster | |
1e73acc8 AS |
851 | |
852 | =head1 COPYRIGHT AND LICENSE | |
853 | ||
b299c47c | 854 | Copyright (C) 2006-2007 by (Anno Siegel) |
1e73acc8 AS |
855 | |
856 | This library is free software; you can redistribute it and/or modify | |
857 | it under the same terms as Perl itself, either Perl version 5.8.7 or, | |
858 | at your option, any later version of Perl 5 you may have available. | |
859 | ||
860 | =cut |