Commit | Line | Data |
---|---|---|
1e73acc8 AS |
1 | package Hash::Util::FieldHash; |
2 | ||
3 | use 5.009004; | |
4 | use strict; | |
5 | use warnings; | |
1e73acc8 AS |
6 | use Scalar::Util qw( reftype); |
7 | ||
0b057af7 | 8 | our $VERSION = '1.18'; |
3ae03d21 | 9 | |
1e73acc8 AS |
10 | require Exporter; |
11 | our @ISA = qw(Exporter); | |
12 | our %EXPORT_TAGS = ( | |
13 | 'all' => [ qw( | |
14 | fieldhash | |
15 | fieldhashes | |
d74d639b AS |
16 | idhash |
17 | idhashes | |
18 | id | |
19 | id_2obj | |
20 | register | |
1e73acc8 AS |
21 | )], |
22 | ); | |
23 | our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); | |
1e73acc8 | 24 | |
1e73acc8 AS |
25 | { |
26 | require XSLoader; | |
6ff38c27 AS |
27 | my %ob_reg; # private object registry |
28 | sub _ob_reg { \ %ob_reg } | |
da4061d3 | 29 | XSLoader::load(); |
1e73acc8 AS |
30 | } |
31 | ||
32 | sub fieldhash (\%) { | |
33 | for ( shift ) { | |
34 | return unless ref() && reftype( $_) eq 'HASH'; | |
35 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); | |
d74d639b AS |
36 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2; |
37 | return; | |
38 | } | |
39 | } | |
40 | ||
41 | sub idhash (\%) { | |
42 | for ( shift ) { | |
43 | return unless ref() && reftype( $_) eq 'HASH'; | |
44 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); | |
45 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1; | |
1e73acc8 AS |
46 | return; |
47 | } | |
48 | } | |
49 | ||
50 | sub fieldhashes { map &fieldhash( $_), @_ } | |
d74d639b | 51 | sub idhashes { map &idhash( $_), @_ } |
1e73acc8 AS |
52 | |
53 | 1; | |
54 | __END__ | |
55 | ||
56 | =head1 NAME | |
57 | ||
d74d639b | 58 | Hash::Util::FieldHash - Support for Inside-Out Classes |
1e73acc8 | 59 | |
b299c47c | 60 | =head1 SYNOPSIS |
1e73acc8 | 61 | |
d74d639b | 62 | ### Create fieldhashes |
1e73acc8 | 63 | use Hash::Util qw(fieldhash fieldhashes); |
6ff38c27 | 64 | |
1e73acc8 AS |
65 | # Create a single field hash |
66 | fieldhash my %foo; | |
6ff38c27 | 67 | |
1e73acc8 AS |
68 | # Create three at once... |
69 | fieldhashes \ my(%foo, %bar, %baz); | |
70 | # ...or any number | |
71 | fieldhashes @hashrefs; | |
72 | ||
d74d639b | 73 | ### Create an idhash and register it for garbage collection |
ada3dff8 | 74 | use Hash::Util::FieldHash qw(idhash register); |
d74d639b | 75 | idhash my %name; |
ada3dff8 | 76 | my $object = \ do { my $o }; |
d74d639b | 77 | # register the idhash for garbage collection with $object |
ada3dff8 RGS |
78 | register($object, \ %name); |
79 | # the following entry will be deleted when $object goes out of scope | |
80 | $name{$object} = 'John Doe'; | |
b299c47c | 81 | |
d74d639b | 82 | ### Register an ordinary hash for garbage collection |
ada3dff8 | 83 | use Hash::Util::FieldHash qw(id register); |
d74d639b | 84 | my %name; |
ada3dff8 RGS |
85 | my $object = \ do { my $o }; |
86 | # register the hash %name for garbage collection of $object's id | |
87 | register $object, \ %name; | |
88 | # the following entry will be deleted when $object goes out of scope | |
89 | $name{id $object} = 'John Doe'; | |
d74d639b | 90 | |
b299c47c | 91 | =head1 FUNCTIONS |
1e73acc8 | 92 | |
d74d639b | 93 | C<Hash::Util::FieldHash> offers a number of functions in support of |
b299c47c | 94 | L<The Inside-out Technique> of class construction. |
1e73acc8 AS |
95 | |
96 | =over | |
97 | ||
d74d639b | 98 | =item id |
1e73acc8 | 99 | |
d74d639b | 100 | id($obj) |
1e73acc8 | 101 | |
d74d639b AS |
102 | Returns the reference address of a reference $obj. If $obj is |
103 | not a reference, returns $obj. | |
1e73acc8 | 104 | |
d74d639b | 105 | This function is a stand-in replacement for |
712d8896 | 106 | L<Scalar::Util::refaddr|Scalar::Util/refaddr>, |
b79fa73c | 107 | that is, it returns |
d74d639b AS |
108 | the reference address of its argument as a numeric value. The only |
109 | difference is that C<refaddr()> returns C<undef> when given a | |
110 | non-reference while C<id()> returns its argument unchanged. | |
1e73acc8 | 111 | |
d74d639b AS |
112 | C<id()> also uses a caching technique that makes it faster when |
113 | the id of an object is requested often, but slower if it is needed | |
114 | only once or twice. | |
1e73acc8 | 115 | |
d74d639b | 116 | =item id_2obj |
1e73acc8 | 117 | |
d74d639b | 118 | $obj = id_2obj($id) |
1e73acc8 | 119 | |
d74d639b AS |
120 | If C<$id> is the id of a registered object (see L</register>), returns |
121 | the object, otherwise an undefined value. For registered objects this | |
122 | is the inverse function of C<id()>. | |
1e73acc8 | 123 | |
d74d639b | 124 | =item register |
1e73acc8 | 125 | |
d74d639b AS |
126 | register($obj) |
127 | register($obj, @hashrefs) | |
1e73acc8 | 128 | |
d74d639b AS |
129 | In the first form, registers an object to work with for the function |
130 | C<id_2obj()>. In the second form, it additionally marks the given | |
131 | hashrefs down for garbage collection. This means that when the object | |
132 | goes out of scope, any entries in the given hashes under the key of | |
133 | C<id($obj)> will be deleted from the hashes. | |
1e73acc8 | 134 | |
d74d639b AS |
135 | It is a fatal error to register a non-reference $obj. Any non-hashrefs |
136 | among the following arguments are silently ignored. | |
1e73acc8 | 137 | |
d74d639b AS |
138 | It is I<not> an error to register the same object multiple times with |
139 | varying sets of hashrefs. Any hashrefs that are not registered yet | |
140 | will be added, others ignored. | |
1e73acc8 | 141 | |
d74d639b AS |
142 | Registry also implies thread support. When a new thread is created, |
143 | all references are replaced with new ones, including all objects. | |
144 | If a hash uses the reference address of an object as a key, that | |
145 | connection would be broken. With a registered object, its id will | |
146 | be updated in all hashes registered with it. | |
1e73acc8 | 147 | |
d74d639b | 148 | =item idhash |
1e73acc8 | 149 | |
d74d639b | 150 | idhash my %hash |
1e73acc8 | 151 | |
d74d639b | 152 | Makes an idhash from the argument, which must be a hash. |
1e73acc8 | 153 | |
d74d639b AS |
154 | An I<idhash> works like a normal hash, except that it stringifies a |
155 | I<reference used as a key> differently. A reference is stringified | |
156 | as if the C<id()> function had been invoked on it, that is, its | |
157 | reference address in decimal is used as the key. | |
1e73acc8 | 158 | |
d74d639b | 159 | =item idhashes |
1e73acc8 | 160 | |
ada3dff8 | 161 | idhashes \ my(%hash, %gnash, %trash) |
d74d639b | 162 | idhashes \ @hashrefs |
1e73acc8 | 163 | |
d74d639b AS |
164 | Creates many idhashes from its hashref arguments. Returns those |
165 | arguments that could be converted or their number in scalar context. | |
1e73acc8 | 166 | |
d74d639b | 167 | =item fieldhash |
1e73acc8 | 168 | |
d74d639b | 169 | fieldhash %hash; |
1e73acc8 | 170 | |
d74d639b AS |
171 | Creates a single fieldhash. The argument must be a hash. Returns |
172 | a reference to the given hash if successful, otherwise nothing. | |
1e73acc8 | 173 | |
d74d639b AS |
174 | A I<fieldhash> is, in short, an idhash with auto-registry. When an |
175 | object (or, indeed, any reference) is used as a fieldhash key, the | |
176 | fieldhash is automatically registered for garbage collection with | |
177 | the object, as if C<register $obj, \ %fieldhash> had been called. | |
1e73acc8 | 178 | |
d74d639b | 179 | =item fieldhashes |
1e73acc8 | 180 | |
d74d639b | 181 | fieldhashes @hashrefs; |
1e73acc8 | 182 | |
d74d639b AS |
183 | Creates any number of field hashes. Arguments must be hash references. |
184 | Returns the converted hashrefs in list context, their number in scalar | |
185 | context. | |
1e73acc8 AS |
186 | |
187 | =back | |
188 | ||
b299c47c | 189 | =head1 DESCRIPTION |
1e73acc8 | 190 | |
d74d639b AS |
191 | A word on terminology: I shall use the term I<field> for a scalar |
192 | piece of data that a class associates with an object. Other terms that | |
193 | have been used for this concept are "object variable", "(object) property", | |
ada3dff8 | 194 | "(object) attribute" and more. Especially "attribute" has some currency |
d74d639b AS |
195 | among Perl programmer, but that clashes with the C<attributes> pragma. The |
196 | term "field" also has some currency in this sense and doesn't seem | |
197 | to conflict with other Perl terminology. | |
198 | ||
199 | In Perl, an object is a blessed reference. The standard way of associating | |
b7ffd429 | 200 | data with an object is to store the data inside the object's body, that is, |
d74d639b AS |
201 | the piece of data pointed to by the reference. |
202 | ||
203 | In consequence, if two or more classes want to access an object they | |
ada3dff8 | 204 | I<must> agree on the type of reference and also on the organization of |
d74d639b AS |
205 | data within the object body. Failure to agree on the type results in |
206 | immediate death when the wrong method tries to access an object. Failure | |
207 | to agree on data organization may lead to one class trampling over the | |
208 | data of another. | |
209 | ||
210 | This object model leads to a tight coupling between subclasses. | |
211 | If one class wants to inherit from another (and both classes access | |
212 | object data), the classes must agree about implementation details. | |
213 | Inheritance can only be used among classes that are maintained together, | |
214 | in a single source or not. | |
215 | ||
216 | In particular, it is not possible to write general-purpose classes | |
217 | in this technique, classes that can advertise themselves as "Put me | |
218 | on your @ISA list and use my methods". If the other class has different | |
219 | ideas about how the object body is used, there is trouble. | |
220 | ||
69895b01 | 221 | For reference C<Name_hash> in L</Example 1> shows the standard implementation of |
d74d639b AS |
222 | a simple class C<Name> in the well-known hash based way. It also demonstrates |
223 | the predictable failure to construct a common subclass C<NamedFile> | |
224 | of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs). | |
225 | ||
226 | Thus, techniques are of interest that store object data I<not> in | |
227 | the object body but some other place. | |
228 | ||
229 | =head2 The Inside-out Technique | |
230 | ||
231 | With I<inside-out> classes, each class declares a (typically lexical) | |
232 | hash for each field it wants to use. The reference address of an | |
233 | object is used as the hash key. By definition, the reference address | |
234 | is unique to each object so this guarantees a place for each field that | |
69895b01 KW |
235 | is private to the class and unique to each object. See C<Name_id> |
236 | in L</Example 1> for a simple example. | |
d74d639b AS |
237 | |
238 | In comparison to the standard implementation where the object is a | |
239 | hash and the fields correspond to hash keys, here the fields correspond | |
240 | to hashes, and the object determines the hash key. Thus the hashes | |
241 | appear to be turned I<inside out>. | |
242 | ||
243 | The body of an object is never examined by an inside-out class, only | |
244 | its reference address is used. This allows for the body of an actual | |
245 | object to be I<anything at all> while the object methods of the class | |
246 | still work as designed. This is a key feature of inside-out classes. | |
247 | ||
248 | =head2 Problems of Inside-out | |
249 | ||
250 | Inside-out classes give us freedom of inheritance, but as usual there | |
251 | is a price. | |
252 | ||
253 | Most obviously, there is the necessity of retrieving the reference | |
254 | address of an object for each data access. It's a minor inconvenience, | |
255 | but it does clutter the code. | |
256 | ||
257 | More important (and less obvious) is the necessity of garbage | |
258 | collection. When a normal object dies, anything stored in the | |
259 | object body is garbage-collected by perl. With inside-out objects, | |
260 | Perl knows nothing about the data stored in field hashes by a class, | |
261 | but these must be deleted when the object goes out of scope. Thus | |
262 | the class must provide a C<DESTROY> method to take care of that. | |
263 | ||
264 | In the presence of multiple classes it can be non-trivial | |
265 | to make sure that every relevant destructor is called for | |
266 | every object. Perl calls the first one it finds on the | |
267 | inheritance tree (if any) and that's it. | |
268 | ||
269 | A related issue is thread-safety. When a new thread is created, | |
270 | the Perl interpreter is cloned, which implies that all reference | |
271 | addresses in use will be replaced with new ones. Thus, if a class | |
272 | tries to access a field of a cloned object its (cloned) data will | |
273 | still be stored under the now invalid reference address of the | |
274 | original in the parent thread. A general C<CLONE> method must | |
275 | be provided to re-establish the association. | |
276 | ||
277 | =head2 Solutions | |
278 | ||
279 | C<Hash::Util::FieldHash> addresses these issues on several | |
280 | levels. | |
281 | ||
282 | The C<id()> function is provided in addition to the | |
283 | existing C<Scalar::Util::refaddr()>. Besides its short name | |
284 | it can be a little faster under some circumstances (and a | |
285 | bit slower under others). Benchmark if it matters. The | |
286 | working of C<id()> also allows the use of the class name | |
b299c47c | 287 | as a I<generic object> as described L<further down|/"The Generic Object">. |
d74d639b AS |
288 | |
289 | The C<id()> function is incorporated in I<id hashes> in the sense | |
290 | that it is called automatically on every key that is used with | |
291 | the hash. No explicit call is necessary. | |
292 | ||
293 | The problems of garbage collection and thread safety are both | |
294 | addressed by the function C<register()>. It registers an object | |
295 | together with any number of hashes. Registry means that when the | |
296 | object dies, an entry in any of the hashes under the reference | |
297 | address of this object will be deleted. This guarantees garbage | |
298 | collection in these hashes. It also means that on thread | |
299 | cloning the object's entries in registered hashes will be | |
300 | replaced with updated entries whose key is the cloned object's | |
301 | reference address. Thus the object-data association becomes | |
302 | thread-safe. | |
303 | ||
304 | Object registry is best done when the object is initialized | |
305 | for use with a class. That way, garbage collection and thread | |
306 | safety are established for every object and every field that is | |
307 | initialized. | |
308 | ||
309 | Finally, I<field hashes> incorporate all these functions in one | |
310 | package. Besides automatically calling the C<id()> function | |
311 | on every object used as a key, the object is registered with | |
312 | the field hash on first use. Classes based on field hashes | |
313 | are fully garbage-collected and thread safe without further | |
314 | measures. | |
315 | ||
316 | =head2 More Problems | |
317 | ||
318 | Another problem that occurs with inside-out classes is serialization. | |
319 | Since the object data is not in its usual place, standard routines | |
320 | like C<Storable::freeze()>, C<Storable::thaw()> and | |
321 | C<Data::Dumper::Dumper()> can't deal with it on their own. Both | |
322 | C<Data::Dumper> and C<Storable> provide the necessary hooks to | |
323 | make things work, but the functions or methods used by the hooks | |
324 | must be provided by each inside-out class. | |
325 | ||
326 | A general solution to the serialization problem would require another | |
92a34c45 | 327 | level of registry, one that associates I<classes> and fields. |
b7ffd429 | 328 | So far, the functions of C<Hash::Util::FieldHash> are unaware of |
d74d639b AS |
329 | any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash> |
330 | doesn't address the serialization problems. | |
331 | ||
332 | =head2 The Generic Object | |
333 | ||
334 | Classes based on the C<id()> function (and hence classes based on | |
335 | C<idhash()> and C<fieldhash()>) show a peculiar behavior in that | |
336 | the class name can be used like an object. Specifically, methods | |
337 | that set or read data associated with an object continue to work as | |
338 | class methods, just as if the class name were an object, distinct from | |
339 | all other objects, with its own data. This object may be called | |
340 | the I<generic object> of the class. | |
341 | ||
ada3dff8 | 342 | This works because field hashes respond to keys that are not references |
d74d639b AS |
343 | like a normal hash would and use the string offered as the hash key. |
344 | Thus, if a method is called as a class method, the field hash is presented | |
345 | with the class name instead of an object and blithely uses it as a key. | |
346 | Since the keys of real objects are decimal numbers, there is no | |
347 | conflict and the slot in the field hash can be used like any other. | |
348 | The C<id()> function behaves correspondingly with respect to non-reference | |
349 | arguments. | |
350 | ||
351 | Two possible uses (besides ignoring the property) come to mind. | |
352 | A singleton class could be implemented this using the generic object. | |
353 | If necessary, an C<init()> method could die or ignore calls with | |
354 | actual objects (references), so only the generic object will ever exist. | |
355 | ||
356 | Another use of the generic object would be as a template. It is | |
357 | a convenient place to store class-specific defaults for various | |
358 | fields to be used in actual object initialization. | |
359 | ||
360 | Usually, the feature can be entirely ignored. Calling I<object | |
361 | methods> as I<class methods> normally leads to an error and isn't used | |
362 | routinely anywhere. It may be a problem that this error isn't | |
363 | indicated by a class with a generic object. | |
364 | ||
365 | =head2 How to use Field Hashes | |
1e73acc8 AS |
366 | |
367 | Traditionally, the definition of an inside-out class contains a bare | |
368 | block inside which a number of lexical hashes are declared and the | |
369 | basic accessor methods defined, usually through C<Scalar::Util::refaddr>. | |
370 | Further methods may be defined outside this block. There has to be | |
371 | a DESTROY method and, for thread support, a CLONE method. | |
372 | ||
ada3dff8 | 373 | When field hashes are used, the basic structure remains the same. |
1e73acc8 AS |
374 | Each lexical hash will be made a field hash. The call to C<refaddr> |
375 | can be omitted from the accessor methods. DESTROY and CLONE methods | |
376 | are not necessary. | |
377 | ||
378 | If you have an existing inside-out class, simply making all hashes | |
379 | field hashes with no other change should make no difference. Through | |
380 | the calls to C<refaddr> or equivalent, the field hashes never get to | |
381 | see a reference and work like normal hashes. Your DESTROY (and | |
382 | CLONE) methods are still needed. | |
383 | ||
384 | To make the field hashes kick in, it is easiest to redefine C<refaddr> | |
385 | as | |
386 | ||
387 | sub refaddr { shift } | |
388 | ||
389 | instead of importing it from C<Scalar::Util>. It should now be possible | |
390 | to disable DESTROY and CLONE. Note that while it isn't disabled, | |
391 | DESTROY will be called before the garbage collection of field hashes, | |
6ff38c27 AS |
392 | so it will be invoked with a functional object and will continue to |
393 | function. | |
394 | ||
395 | It is not desirable to import the functions C<fieldhash> and/or | |
396 | C<fieldhashes> into every class that is going to use them. They | |
397 | are only used once to set up the class. When the class is up and running, | |
398 | these functions serve no more purpose. | |
1e73acc8 | 399 | |
1e73acc8 AS |
400 | If there are only a few field hashes to declare, it is simplest to |
401 | ||
402 | use Hash::Util::FieldHash; | |
403 | ||
404 | early and call the functions qualified: | |
405 | ||
406 | Hash::Util::FieldHash::fieldhash my %foo; | |
407 | ||
408 | Otherwise, import the functions into a convenient package like | |
d74d639b | 409 | C<HUF> or, more general, C<Aux> |
1e73acc8 AS |
410 | |
411 | { | |
412 | package Aux; | |
413 | use Hash::Util::FieldHash ':all'; | |
414 | } | |
415 | ||
416 | and call | |
417 | ||
418 | Aux::fieldhash my %foo; | |
419 | ||
420 | as needed. | |
421 | ||
d74d639b AS |
422 | =head2 Garbage-Collected Hashes |
423 | ||
424 | Garbage collection in a field hash means that entries will "spontaneously" | |
425 | disappear when the object that created them disappears. That must be | |
426 | borne in mind, especially when looping over a field hash. If anything | |
427 | you do inside the loop could cause an object to go out of scope, a | |
428 | random key may be deleted from the hash you are looping over. That | |
429 | can throw the loop iterator, so it's best to cache a consistent snapshot | |
430 | of the keys and/or values and loop over that. You will still have to | |
431 | check that a cached entry still exists when you get to it. | |
432 | ||
433 | Garbage collection can be confusing when keys are created in a field hash | |
434 | from normal scalars as well as references. Once a reference is I<used> with | |
435 | a field hash, the entry will be collected, even if it was later overwritten | |
436 | with a plain scalar key (every positive integer is a candidate). This | |
437 | is true even if the original entry was deleted in the meantime. In fact, | |
438 | deletion from a field hash, and also a test for existence constitute | |
439 | I<use> in this sense and create a liability to delete the entry when | |
440 | the reference goes out of scope. If you happen to create an entry | |
441 | with an identical key from a string or integer, that will be collected | |
442 | instead. Thus, mixed use of references and plain scalars as field hash | |
443 | keys is not entirely supported. | |
444 | ||
b299c47c | 445 | =head1 EXAMPLES |
1e73acc8 | 446 | |
d74d639b AS |
447 | The examples show a very simple class that implements a I<name>, consisting |
448 | of a first and last name (no middle initial). The name class has four | |
449 | methods: | |
1e73acc8 | 450 | |
d74d639b | 451 | =over |
1e73acc8 | 452 | |
d74d639b | 453 | =item * C<init()> |
1e73acc8 | 454 | |
d74d639b AS |
455 | An object method that initializes the first and last name to its |
456 | two arguments. If called as a class method, C<init()> creates an | |
457 | object in the given class and initializes that. | |
1e73acc8 | 458 | |
d74d639b | 459 | =item * C<first()> |
1e73acc8 | 460 | |
d74d639b | 461 | Retrieve the first name |
1e73acc8 | 462 | |
d74d639b | 463 | =item * C<last()> |
1e73acc8 | 464 | |
d74d639b | 465 | Retrieve the last name |
1e73acc8 | 466 | |
d74d639b | 467 | =item * C<name()> |
1e73acc8 | 468 | |
d74d639b | 469 | Retrieve the full name, the first and last name joined by a blank. |
1e73acc8 | 470 | |
d74d639b | 471 | =back |
1e73acc8 | 472 | |
d74d639b AS |
473 | The examples show this class implemented with different levels of |
474 | support by C<Hash::Util::FieldHash>. All supported combinations | |
475 | are shown. The difference between implementations is often quite | |
476 | small. The implementations are: | |
1e73acc8 | 477 | |
d74d639b | 478 | =over |
1e73acc8 | 479 | |
d74d639b | 480 | =item * C<Name_hash> |
1e73acc8 | 481 | |
d74d639b AS |
482 | A conventional (not inside-out) implementation where an object is |
483 | a hash that stores the field values, without support by | |
484 | C<Hash::Util::FieldHash>. This implementation doesn't allow | |
485 | arbitrary inheritance. | |
486 | ||
487 | =item * C<Name_id> | |
488 | ||
489 | Inside-out implementation based on the C<id()> function. It needs | |
490 | a C<DESTROY> method. For thread support a C<CLONE> method (not shown) | |
491 | would also be needed. Instead of C<Hash::Util::FieldHash::id()> the | |
492 | function C<Scalar::Util::refaddr> could be used with very little | |
493 | functional difference. This is the basic pattern of an inside-out | |
494 | class. | |
495 | ||
496 | =item * C<Name_idhash> | |
497 | ||
69895b01 | 498 | Idhash-based inside-out implementation. Like C<Name_id> it needs |
d74d639b AS |
499 | a C<DESTROY> method and would need C<CLONE> for thread support. |
500 | ||
501 | =item * C<Name_id_reg> | |
502 | ||
503 | Inside-out implementation based on the C<id()> function with explicit | |
504 | object registry. No destructor is needed and objects are thread safe. | |
505 | ||
506 | =item * C<Name_idhash_reg> | |
507 | ||
508 | Idhash-based inside-out implementation with explicit object registry. | |
509 | No destructor is needed and objects are thread safe. | |
510 | ||
511 | =item * C<Name_fieldhash> | |
512 | ||
b7ffd429 | 513 | FieldHash-based inside-out implementation. Object registry happens |
d74d639b AS |
514 | automatically. No destructor is needed and objects are thread safe. |
515 | ||
516 | =back | |
517 | ||
518 | These examples are realized in the code below, which could be copied | |
519 | to a file F<Example.pm>. | |
520 | ||
521 | =head2 Example 1 | |
522 | ||
523 | use strict; use warnings; | |
524 | ||
525 | { | |
555bd962 BG |
526 | package Name_hash; # standard implementation: the |
527 | # object is a hash | |
d74d639b AS |
528 | sub init { |
529 | my $obj = shift; | |
ada3dff8 | 530 | my ($first, $last) = @_; |
d74d639b AS |
531 | # create an object if called as class method |
532 | $obj = bless {}, $obj unless ref $obj; | |
533 | $obj->{ first} = $first; | |
534 | $obj->{ last} = $last; | |
535 | $obj; | |
536 | } | |
537 | ||
538 | sub first { shift()->{ first} } | |
539 | sub last { shift()->{ last} } | |
540 | ||
541 | sub name { | |
542 | my $n = shift; | |
543 | join ' ' => $n->first, $n->last; | |
544 | } | |
6ff38c27 | 545 | |
1e73acc8 AS |
546 | } |
547 | ||
d74d639b AS |
548 | { |
549 | package Name_id; | |
ada3dff8 | 550 | use Hash::Util::FieldHash qw(id); |
d74d639b | 551 | |
ada3dff8 | 552 | my (%first, %last); |
d74d639b AS |
553 | |
554 | sub init { | |
555 | my $obj = shift; | |
ada3dff8 | 556 | my ($first, $last) = @_; |
d74d639b AS |
557 | # create an object if called as class method |
558 | $obj = bless \ my $o, $obj unless ref $obj; | |
559 | $first{ id $obj} = $first; | |
560 | $last{ id $obj} = $last; | |
561 | $obj; | |
562 | } | |
563 | ||
564 | sub first { $first{ id shift()} } | |
565 | sub last { $last{ id shift()} } | |
566 | ||
567 | sub name { | |
568 | my $n = shift; | |
569 | join ' ' => $n->first, $n->last; | |
570 | } | |
571 | ||
572 | sub DESTROY { | |
573 | my $id = id shift; | |
574 | delete $first{ $id}; | |
575 | delete $last{ $id}; | |
576 | } | |
1e73acc8 | 577 | |
1e73acc8 AS |
578 | } |
579 | ||
d74d639b AS |
580 | { |
581 | package Name_idhash; | |
582 | use Hash::Util::FieldHash; | |
583 | ||
ada3dff8 | 584 | Hash::Util::FieldHash::idhashes( \ my (%first, %last) ); |
d74d639b AS |
585 | |
586 | sub init { | |
587 | my $obj = shift; | |
ada3dff8 | 588 | my ($first, $last) = @_; |
d74d639b AS |
589 | # create an object if called as class method |
590 | $obj = bless \ my $o, $obj unless ref $obj; | |
591 | $first{ $obj} = $first; | |
592 | $last{ $obj} = $last; | |
593 | $obj; | |
594 | } | |
595 | ||
596 | sub first { $first{ shift()} } | |
597 | sub last { $last{ shift()} } | |
598 | ||
599 | sub name { | |
600 | my $n = shift; | |
601 | join ' ' => $n->first, $n->last; | |
602 | } | |
603 | ||
604 | sub DESTROY { | |
605 | my $n = shift; | |
606 | delete $first{ $n}; | |
607 | delete $last{ $n}; | |
608 | } | |
609 | ||
1e73acc8 AS |
610 | } |
611 | ||
d74d639b AS |
612 | { |
613 | package Name_id_reg; | |
ada3dff8 | 614 | use Hash::Util::FieldHash qw(id register); |
d74d639b | 615 | |
ada3dff8 | 616 | my (%first, %last); |
d74d639b AS |
617 | |
618 | sub init { | |
619 | my $obj = shift; | |
ada3dff8 | 620 | my ($first, $last) = @_; |
d74d639b AS |
621 | # create an object if called as class method |
622 | $obj = bless \ my $o, $obj unless ref $obj; | |
ada3dff8 | 623 | register( $obj, \ (%first, %last) ); |
d74d639b AS |
624 | $first{ id $obj} = $first; |
625 | $last{ id $obj} = $last; | |
626 | $obj; | |
627 | } | |
628 | ||
629 | sub first { $first{ id shift()} } | |
630 | sub last { $last{ id shift()} } | |
631 | ||
632 | sub name { | |
633 | my $n = shift; | |
634 | join ' ' => $n->first, $n->last; | |
635 | } | |
636 | } | |
637 | ||
638 | { | |
639 | package Name_idhash_reg; | |
ada3dff8 | 640 | use Hash::Util::FieldHash qw(register); |
d74d639b | 641 | |
ada3dff8 | 642 | Hash::Util::FieldHash::idhashes \ my (%first, %last); |
d74d639b AS |
643 | |
644 | sub init { | |
645 | my $obj = shift; | |
ada3dff8 | 646 | my ($first, $last) = @_; |
d74d639b AS |
647 | # create an object if called as class method |
648 | $obj = bless \ my $o, $obj unless ref $obj; | |
ada3dff8 | 649 | register( $obj, \ (%first, %last) ); |
d74d639b AS |
650 | $first{ $obj} = $first; |
651 | $last{ $obj} = $last; | |
652 | $obj; | |
653 | } | |
654 | ||
655 | sub first { $first{ shift()} } | |
656 | sub last { $last{ shift()} } | |
657 | ||
658 | sub name { | |
659 | my $n = shift; | |
660 | join ' ' => $n->first, $n->last; | |
661 | } | |
662 | } | |
663 | ||
664 | { | |
665 | package Name_fieldhash; | |
666 | use Hash::Util::FieldHash; | |
667 | ||
ada3dff8 | 668 | Hash::Util::FieldHash::fieldhashes \ my (%first, %last); |
d74d639b AS |
669 | |
670 | sub init { | |
671 | my $obj = shift; | |
ada3dff8 | 672 | my ($first, $last) = @_; |
d74d639b AS |
673 | # create an object if called as class method |
674 | $obj = bless \ my $o, $obj unless ref $obj; | |
675 | $first{ $obj} = $first; | |
676 | $last{ $obj} = $last; | |
677 | $obj; | |
678 | } | |
679 | ||
680 | sub first { $first{ shift()} } | |
681 | sub last { $last{ shift()} } | |
682 | ||
683 | sub name { | |
684 | my $n = shift; | |
685 | join ' ' => $n->first, $n->last; | |
686 | } | |
687 | } | |
688 | ||
689 | 1; | |
690 | ||
b299c47c | 691 | To exercise the various implementations the script L<below|/"Example 2"> can |
d74d639b AS |
692 | be used. |
693 | ||
694 | It sets up a class C<Name> that is a mirror of one of the implementation | |
695 | classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines | |
696 | which implementation is run. | |
697 | ||
698 | The script first verifies the function of the C<Name> class. | |
699 | ||
b7ffd429 | 700 | In the second step, the free inheritability of the implementation |
d74d639b AS |
701 | (or lack thereof) is demonstrated. For this purpose it constructs |
702 | a class called C<NamedFile> which is a common subclass of C<Name> and | |
703 | the standard class C<IO::File>. This puts inheritability to the test | |
704 | because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile> | |
705 | should behave like a file opened for reading and also support the C<name()> | |
706 | method. This class juncture works with exception of the C<Name_hash> | |
707 | implementation, where object initialization fails because of the | |
708 | incompatibility of object bodies. | |
709 | ||
710 | =head2 Example 2 | |
711 | ||
712 | use strict; use warnings; $| = 1; | |
713 | ||
714 | use Example; | |
715 | ||
716 | { | |
717 | package Name; | |
9f21ab77 | 718 | use parent 'Name_id'; # define here which implementation to run |
d74d639b AS |
719 | } |
720 | ||
721 | ||
722 | # Verify that the base package works | |
ada3dff8 | 723 | my $n = Name->init(qw(Albert Einstein)); |
d74d639b AS |
724 | print $n->name, "\n"; |
725 | print "\n"; | |
726 | ||
727 | # Create a named file handle (See definition below) | |
ada3dff8 | 728 | my $nf = NamedFile->init(qw(/tmp/x Filomena File)); |
d74d639b AS |
729 | # use as a file handle... |
730 | for ( 1 .. 3 ) { | |
731 | my $l = <$nf>; | |
732 | print "line $_: $l"; | |
733 | } | |
734 | # ...and as a Name object | |
735 | print "...brought to you by ", $nf->name, "\n"; | |
736 | exit; | |
737 | ||
738 | ||
739 | # Definition of NamedFile | |
740 | package NamedFile; | |
9f21ab77 RS |
741 | use parent 'Name'; |
742 | use parent 'IO::File'; | |
d74d639b AS |
743 | |
744 | sub init { | |
745 | my $obj = shift; | |
ada3dff8 | 746 | my ($file, $first, $last) = @_; |
d74d639b | 747 | $obj = $obj->IO::File::new() unless ref $obj; |
ada3dff8 RGS |
748 | $obj->open($file) or die "Can't read '$file': $!"; |
749 | $obj->Name::init($first, $last); | |
d74d639b AS |
750 | } |
751 | __END__ | |
1e73acc8 | 752 | |
1e73acc8 | 753 | |
b299c47c | 754 | =head1 GUTS |
1e73acc8 AS |
755 | |
756 | To make C<Hash::Util::FieldHash> work, there were two changes to | |
b7b1e41b | 757 | F<perl> itself. C<PERL_MAGIC_uvar> was made available for hashes, |
1e73acc8 AS |
758 | and weak references now call uvar C<get> magic after a weakref has been |
759 | cleared. The first feature is used to make field hashes intercept | |
760 | their keys upon access. The second one triggers garbage collection. | |
761 | ||
762 | =head2 The C<PERL_MAGIC_uvar> interface for hashes | |
763 | ||
764 | C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and | |
765 | C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which | |
766 | defines the interface. The call happens for hashes with "uvar" magic | |
767 | if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set> | |
768 | fields. Hashes are unaffected if (and as long as) these fields | |
769 | hold different values. | |
770 | ||
771 | Upon the call, the C<mg_obj> field will hold the hash key to be accessed. | |
772 | Upon return, the C<SV*> value in C<mg_obj> will be used in place of the | |
773 | original key in the hash access. The integer index value in the first | |
774 | parameter will be the C<action> value from C<hv_fetch_common>, or -1 | |
775 | if the call is from C<hv_delete_common>. | |
776 | ||
777 | This is a template for a function suitable for the C<uf_val> field in | |
778 | a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index> | |
779 | fields are irrelevant. | |
780 | ||
781 | IV watch_key(pTHX_ IV action, SV* field) { | |
782 | MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); | |
783 | SV* keysv = mg->mg_obj; | |
784 | /* Do whatever you need to. If you decide to | |
785 | supply a different key newkey, return it like this | |
786 | */ | |
787 | sv_2mortal(newkey); | |
788 | mg->mg_obj = newkey; | |
789 | return 0; | |
790 | } | |
791 | ||
792 | =head2 Weakrefs call uvar magic | |
793 | ||
794 | When a weak reference is stored in an C<SV> that has "uvar" magic, C<set> | |
795 | magic is called after the reference has gone stale. This hook can be | |
796 | used to trigger further garbage-collection activities associated with | |
797 | the referenced object. | |
798 | ||
799 | =head2 How field hashes work | |
800 | ||
801 | The three features of key hashes, I<key replacement>, I<thread support>, | |
802 | and I<garbage collection> are supported by a data structure called | |
6ff38c27 AS |
803 | the I<object registry>. This is a private hash where every object |
804 | is stored. An "object" in this sense is any reference (blessed or | |
805 | unblessed) that has been used as a field hash key. | |
1e73acc8 AS |
806 | |
807 | The object registry keeps track of references that have been used as | |
808 | field hash keys. The keys are generated from the reference address | |
809 | like in a field hash (though the registry isn't a field hash). Each | |
810 | value is a weak copy of the original reference, stored in an C<SV> that | |
811 | is itself magical (C<PERL_MAGIC_uvar> again). The magical structure | |
812 | holds a list (another hash, really) of field hashes that the reference | |
813 | has been used with. When the weakref becomes stale, the magic is | |
814 | activated and uses the list to delete the reference from all field | |
815 | hashes it has been used with. After that, the entry is removed from | |
816 | the object registry itself. Implicitly, that frees the magic structure | |
817 | and the storage it has been using. | |
818 | ||
819 | Whenever a reference is used as a field hash key, the object registry | |
820 | is checked and a new entry is made if necessary. The field hash is | |
821 | then added to the list of fields this reference has used. | |
822 | ||
823 | The object registry is also used to repair a field hash after thread | |
824 | cloning. Here, the entire object registry is processed. For every | |
825 | reference found there, the field hashes it has used are visited and | |
826 | the entry is updated. | |
827 | ||
828 | =head2 Internal function Hash::Util::FieldHash::_fieldhash | |
829 | ||
830 | # test if %hash is a field hash | |
831 | my $result = _fieldhash \ %hash, 0; | |
832 | ||
833 | # make %hash a field hash | |
834 | my $result = _fieldhash \ %hash, 1; | |
835 | ||
836 | C<_fieldhash> is the internal function used to create field hashes. | |
837 | It takes two arguments, a hashref and a mode. If the mode is boolean | |
838 | false, the hash is not changed but tested if it is a field hash. If | |
839 | the hash isn't a field hash the return value is boolean false. If it | |
840 | is, the return value indicates the mode of field hash. When called with | |
841 | a boolean true mode, it turns the given hash into a field hash of this | |
842 | mode, returning the mode of the created field hash. C<_fieldhash> | |
843 | does not erase the given hash. | |
844 | ||
845 | Currently there is only one type of field hash, and only the boolean | |
846 | value of the mode makes a difference, but that may change. | |
847 | ||
848 | =head1 AUTHOR | |
849 | ||
3ae03d21 AS |
850 | Anno Siegel (ANNO) wrote the xs code and the changes in perl proper |
851 | Jerry Hedden (JDHEDDEN) made it faster | |
1e73acc8 AS |
852 | |
853 | =head1 COPYRIGHT AND LICENSE | |
854 | ||
b299c47c | 855 | Copyright (C) 2006-2007 by (Anno Siegel) |
1e73acc8 AS |
856 | |
857 | This library is free software; you can redistribute it and/or modify | |
858 | it under the same terms as Perl itself, either Perl version 5.8.7 or, | |
859 | at your option, any later version of Perl 5 you may have available. | |
860 | ||
861 | =cut |