| 1 | package Hash::Util::FieldHash; |
| 2 | |
| 3 | use 5.009004; |
| 4 | use strict; |
| 5 | use warnings; |
| 6 | use Scalar::Util qw( reftype); |
| 7 | |
| 8 | our $VERSION = '1.12'; |
| 9 | |
| 10 | require Exporter; |
| 11 | our @ISA = qw(Exporter); |
| 12 | our %EXPORT_TAGS = ( |
| 13 | 'all' => [ qw( |
| 14 | fieldhash |
| 15 | fieldhashes |
| 16 | idhash |
| 17 | idhashes |
| 18 | id |
| 19 | id_2obj |
| 20 | register |
| 21 | )], |
| 22 | ); |
| 23 | our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
| 24 | |
| 25 | { |
| 26 | require XSLoader; |
| 27 | my %ob_reg; # private object registry |
| 28 | sub _ob_reg { \ %ob_reg } |
| 29 | XSLoader::load(); |
| 30 | } |
| 31 | |
| 32 | sub fieldhash (\%) { |
| 33 | for ( shift ) { |
| 34 | return unless ref() && reftype( $_) eq 'HASH'; |
| 35 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); |
| 36 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2; |
| 37 | return; |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | sub idhash (\%) { |
| 42 | for ( shift ) { |
| 43 | return unless ref() && reftype( $_) eq 'HASH'; |
| 44 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0); |
| 45 | return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1; |
| 46 | return; |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | sub fieldhashes { map &fieldhash( $_), @_ } |
| 51 | sub idhashes { map &idhash( $_), @_ } |
| 52 | |
| 53 | 1; |
| 54 | __END__ |
| 55 | |
| 56 | =head1 NAME |
| 57 | |
| 58 | Hash::Util::FieldHash - Support for Inside-Out Classes |
| 59 | |
| 60 | =head1 SYNOPSIS |
| 61 | |
| 62 | ### Create fieldhashes |
| 63 | use Hash::Util qw(fieldhash fieldhashes); |
| 64 | |
| 65 | # Create a single field hash |
| 66 | fieldhash my %foo; |
| 67 | |
| 68 | # Create three at once... |
| 69 | fieldhashes \ my(%foo, %bar, %baz); |
| 70 | # ...or any number |
| 71 | fieldhashes @hashrefs; |
| 72 | |
| 73 | ### Create an idhash and register it for garbage collection |
| 74 | use Hash::Util::FieldHash qw(idhash register); |
| 75 | idhash my %name; |
| 76 | my $object = \ do { my $o }; |
| 77 | # register the idhash for garbage collection with $object |
| 78 | register($object, \ %name); |
| 79 | # the following entry will be deleted when $object goes out of scope |
| 80 | $name{$object} = 'John Doe'; |
| 81 | |
| 82 | ### Register an ordinary hash for garbage collection |
| 83 | use Hash::Util::FieldHash qw(id register); |
| 84 | my %name; |
| 85 | my $object = \ do { my $o }; |
| 86 | # register the hash %name for garbage collection of $object's id |
| 87 | register $object, \ %name; |
| 88 | # the following entry will be deleted when $object goes out of scope |
| 89 | $name{id $object} = 'John Doe'; |
| 90 | |
| 91 | =head1 FUNCTIONS |
| 92 | |
| 93 | C<Hash::Util::FieldHash> offers a number of functions in support of |
| 94 | L<The Inside-out Technique> of class construction. |
| 95 | |
| 96 | =over |
| 97 | |
| 98 | =item id |
| 99 | |
| 100 | id($obj) |
| 101 | |
| 102 | Returns the reference address of a reference $obj. If $obj is |
| 103 | not a reference, returns $obj. |
| 104 | |
| 105 | This function is a stand-in replacement for |
| 106 | L<Scalar::Util::refaddr|Scalar::Util/refaddr EXPR>, that is, it returns |
| 107 | the reference address of its argument as a numeric value. The only |
| 108 | difference is that C<refaddr()> returns C<undef> when given a |
| 109 | non-reference while C<id()> returns its argument unchanged. |
| 110 | |
| 111 | C<id()> also uses a caching technique that makes it faster when |
| 112 | the id of an object is requested often, but slower if it is needed |
| 113 | only once or twice. |
| 114 | |
| 115 | =item id_2obj |
| 116 | |
| 117 | $obj = id_2obj($id) |
| 118 | |
| 119 | If C<$id> is the id of a registered object (see L</register>), returns |
| 120 | the object, otherwise an undefined value. For registered objects this |
| 121 | is the inverse function of C<id()>. |
| 122 | |
| 123 | =item register |
| 124 | |
| 125 | register($obj) |
| 126 | register($obj, @hashrefs) |
| 127 | |
| 128 | In the first form, registers an object to work with for the function |
| 129 | C<id_2obj()>. In the second form, it additionally marks the given |
| 130 | hashrefs down for garbage collection. This means that when the object |
| 131 | goes out of scope, any entries in the given hashes under the key of |
| 132 | C<id($obj)> will be deleted from the hashes. |
| 133 | |
| 134 | It is a fatal error to register a non-reference $obj. Any non-hashrefs |
| 135 | among the following arguments are silently ignored. |
| 136 | |
| 137 | It is I<not> an error to register the same object multiple times with |
| 138 | varying sets of hashrefs. Any hashrefs that are not registered yet |
| 139 | will be added, others ignored. |
| 140 | |
| 141 | Registry also implies thread support. When a new thread is created, |
| 142 | all references are replaced with new ones, including all objects. |
| 143 | If a hash uses the reference address of an object as a key, that |
| 144 | connection would be broken. With a registered object, its id will |
| 145 | be updated in all hashes registered with it. |
| 146 | |
| 147 | =item idhash |
| 148 | |
| 149 | idhash my %hash |
| 150 | |
| 151 | Makes an idhash from the argument, which must be a hash. |
| 152 | |
| 153 | An I<idhash> works like a normal hash, except that it stringifies a |
| 154 | I<reference used as a key> differently. A reference is stringified |
| 155 | as if the C<id()> function had been invoked on it, that is, its |
| 156 | reference address in decimal is used as the key. |
| 157 | |
| 158 | =item idhashes |
| 159 | |
| 160 | idhashes \ my(%hash, %gnash, %trash) |
| 161 | idhashes \ @hashrefs |
| 162 | |
| 163 | Creates many idhashes from its hashref arguments. Returns those |
| 164 | arguments that could be converted or their number in scalar context. |
| 165 | |
| 166 | =item fieldhash |
| 167 | |
| 168 | fieldhash %hash; |
| 169 | |
| 170 | Creates a single fieldhash. The argument must be a hash. Returns |
| 171 | a reference to the given hash if successful, otherwise nothing. |
| 172 | |
| 173 | A I<fieldhash> is, in short, an idhash with auto-registry. When an |
| 174 | object (or, indeed, any reference) is used as a fieldhash key, the |
| 175 | fieldhash is automatically registered for garbage collection with |
| 176 | the object, as if C<register $obj, \ %fieldhash> had been called. |
| 177 | |
| 178 | =item fieldhashes |
| 179 | |
| 180 | fieldhashes @hashrefs; |
| 181 | |
| 182 | Creates any number of field hashes. Arguments must be hash references. |
| 183 | Returns the converted hashrefs in list context, their number in scalar |
| 184 | context. |
| 185 | |
| 186 | =back |
| 187 | |
| 188 | =head1 DESCRIPTION |
| 189 | |
| 190 | A word on terminology: I shall use the term I<field> for a scalar |
| 191 | piece of data that a class associates with an object. Other terms that |
| 192 | have been used for this concept are "object variable", "(object) property", |
| 193 | "(object) attribute" and more. Especially "attribute" has some currency |
| 194 | among Perl programmer, but that clashes with the C<attributes> pragma. The |
| 195 | term "field" also has some currency in this sense and doesn't seem |
| 196 | to conflict with other Perl terminology. |
| 197 | |
| 198 | In Perl, an object is a blessed reference. The standard way of associating |
| 199 | data with an object is to store the data inside the object's body, that is, |
| 200 | the piece of data pointed to by the reference. |
| 201 | |
| 202 | In consequence, if two or more classes want to access an object they |
| 203 | I<must> agree on the type of reference and also on the organization of |
| 204 | data within the object body. Failure to agree on the type results in |
| 205 | immediate death when the wrong method tries to access an object. Failure |
| 206 | to agree on data organization may lead to one class trampling over the |
| 207 | data of another. |
| 208 | |
| 209 | This object model leads to a tight coupling between subclasses. |
| 210 | If one class wants to inherit from another (and both classes access |
| 211 | object data), the classes must agree about implementation details. |
| 212 | Inheritance can only be used among classes that are maintained together, |
| 213 | in a single source or not. |
| 214 | |
| 215 | In particular, it is not possible to write general-purpose classes |
| 216 | in this technique, classes that can advertise themselves as "Put me |
| 217 | on your @ISA list and use my methods". If the other class has different |
| 218 | ideas about how the object body is used, there is trouble. |
| 219 | |
| 220 | For reference C<Name_hash> in L</Example 1> shows the standard implementation of |
| 221 | a simple class C<Name> in the well-known hash based way. It also demonstrates |
| 222 | the predictable failure to construct a common subclass C<NamedFile> |
| 223 | of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs). |
| 224 | |
| 225 | Thus, techniques are of interest that store object data I<not> in |
| 226 | the object body but some other place. |
| 227 | |
| 228 | =head2 The Inside-out Technique |
| 229 | |
| 230 | With I<inside-out> classes, each class declares a (typically lexical) |
| 231 | hash for each field it wants to use. The reference address of an |
| 232 | object is used as the hash key. By definition, the reference address |
| 233 | is unique to each object so this guarantees a place for each field that |
| 234 | is private to the class and unique to each object. See C<Name_id> |
| 235 | in L</Example 1> for a simple example. |
| 236 | |
| 237 | In comparison to the standard implementation where the object is a |
| 238 | hash and the fields correspond to hash keys, here the fields correspond |
| 239 | to hashes, and the object determines the hash key. Thus the hashes |
| 240 | appear to be turned I<inside out>. |
| 241 | |
| 242 | The body of an object is never examined by an inside-out class, only |
| 243 | its reference address is used. This allows for the body of an actual |
| 244 | object to be I<anything at all> while the object methods of the class |
| 245 | still work as designed. This is a key feature of inside-out classes. |
| 246 | |
| 247 | =head2 Problems of Inside-out |
| 248 | |
| 249 | Inside-out classes give us freedom of inheritance, but as usual there |
| 250 | is a price. |
| 251 | |
| 252 | Most obviously, there is the necessity of retrieving the reference |
| 253 | address of an object for each data access. It's a minor inconvenience, |
| 254 | but it does clutter the code. |
| 255 | |
| 256 | More important (and less obvious) is the necessity of garbage |
| 257 | collection. When a normal object dies, anything stored in the |
| 258 | object body is garbage-collected by perl. With inside-out objects, |
| 259 | Perl knows nothing about the data stored in field hashes by a class, |
| 260 | but these must be deleted when the object goes out of scope. Thus |
| 261 | the class must provide a C<DESTROY> method to take care of that. |
| 262 | |
| 263 | In the presence of multiple classes it can be non-trivial |
| 264 | to make sure that every relevant destructor is called for |
| 265 | every object. Perl calls the first one it finds on the |
| 266 | inheritance tree (if any) and that's it. |
| 267 | |
| 268 | A related issue is thread-safety. When a new thread is created, |
| 269 | the Perl interpreter is cloned, which implies that all reference |
| 270 | addresses in use will be replaced with new ones. Thus, if a class |
| 271 | tries to access a field of a cloned object its (cloned) data will |
| 272 | still be stored under the now invalid reference address of the |
| 273 | original in the parent thread. A general C<CLONE> method must |
| 274 | be provided to re-establish the association. |
| 275 | |
| 276 | =head2 Solutions |
| 277 | |
| 278 | C<Hash::Util::FieldHash> addresses these issues on several |
| 279 | levels. |
| 280 | |
| 281 | The C<id()> function is provided in addition to the |
| 282 | existing C<Scalar::Util::refaddr()>. Besides its short name |
| 283 | it can be a little faster under some circumstances (and a |
| 284 | bit slower under others). Benchmark if it matters. The |
| 285 | working of C<id()> also allows the use of the class name |
| 286 | as a I<generic object> as described L<further down|/"The Generic Object">. |
| 287 | |
| 288 | The C<id()> function is incorporated in I<id hashes> in the sense |
| 289 | that it is called automatically on every key that is used with |
| 290 | the hash. No explicit call is necessary. |
| 291 | |
| 292 | The problems of garbage collection and thread safety are both |
| 293 | addressed by the function C<register()>. It registers an object |
| 294 | together with any number of hashes. Registry means that when the |
| 295 | object dies, an entry in any of the hashes under the reference |
| 296 | address of this object will be deleted. This guarantees garbage |
| 297 | collection in these hashes. It also means that on thread |
| 298 | cloning the object's entries in registered hashes will be |
| 299 | replaced with updated entries whose key is the cloned object's |
| 300 | reference address. Thus the object-data association becomes |
| 301 | thread-safe. |
| 302 | |
| 303 | Object registry is best done when the object is initialized |
| 304 | for use with a class. That way, garbage collection and thread |
| 305 | safety are established for every object and every field that is |
| 306 | initialized. |
| 307 | |
| 308 | Finally, I<field hashes> incorporate all these functions in one |
| 309 | package. Besides automatically calling the C<id()> function |
| 310 | on every object used as a key, the object is registered with |
| 311 | the field hash on first use. Classes based on field hashes |
| 312 | are fully garbage-collected and thread safe without further |
| 313 | measures. |
| 314 | |
| 315 | =head2 More Problems |
| 316 | |
| 317 | Another problem that occurs with inside-out classes is serialization. |
| 318 | Since the object data is not in its usual place, standard routines |
| 319 | like C<Storable::freeze()>, C<Storable::thaw()> and |
| 320 | C<Data::Dumper::Dumper()> can't deal with it on their own. Both |
| 321 | C<Data::Dumper> and C<Storable> provide the necessary hooks to |
| 322 | make things work, but the functions or methods used by the hooks |
| 323 | must be provided by each inside-out class. |
| 324 | |
| 325 | A general solution to the serialization problem would require another |
| 326 | level of registry, one that associates I<classes> and fields. |
| 327 | So far, the functions of C<Hash::Util::FieldHash> are unaware of |
| 328 | any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash> |
| 329 | doesn't address the serialization problems. |
| 330 | |
| 331 | =head2 The Generic Object |
| 332 | |
| 333 | Classes based on the C<id()> function (and hence classes based on |
| 334 | C<idhash()> and C<fieldhash()>) show a peculiar behavior in that |
| 335 | the class name can be used like an object. Specifically, methods |
| 336 | that set or read data associated with an object continue to work as |
| 337 | class methods, just as if the class name were an object, distinct from |
| 338 | all other objects, with its own data. This object may be called |
| 339 | the I<generic object> of the class. |
| 340 | |
| 341 | This works because field hashes respond to keys that are not references |
| 342 | like a normal hash would and use the string offered as the hash key. |
| 343 | Thus, if a method is called as a class method, the field hash is presented |
| 344 | with the class name instead of an object and blithely uses it as a key. |
| 345 | Since the keys of real objects are decimal numbers, there is no |
| 346 | conflict and the slot in the field hash can be used like any other. |
| 347 | The C<id()> function behaves correspondingly with respect to non-reference |
| 348 | arguments. |
| 349 | |
| 350 | Two possible uses (besides ignoring the property) come to mind. |
| 351 | A singleton class could be implemented this using the generic object. |
| 352 | If necessary, an C<init()> method could die or ignore calls with |
| 353 | actual objects (references), so only the generic object will ever exist. |
| 354 | |
| 355 | Another use of the generic object would be as a template. It is |
| 356 | a convenient place to store class-specific defaults for various |
| 357 | fields to be used in actual object initialization. |
| 358 | |
| 359 | Usually, the feature can be entirely ignored. Calling I<object |
| 360 | methods> as I<class methods> normally leads to an error and isn't used |
| 361 | routinely anywhere. It may be a problem that this error isn't |
| 362 | indicated by a class with a generic object. |
| 363 | |
| 364 | =head2 How to use Field Hashes |
| 365 | |
| 366 | Traditionally, the definition of an inside-out class contains a bare |
| 367 | block inside which a number of lexical hashes are declared and the |
| 368 | basic accessor methods defined, usually through C<Scalar::Util::refaddr>. |
| 369 | Further methods may be defined outside this block. There has to be |
| 370 | a DESTROY method and, for thread support, a CLONE method. |
| 371 | |
| 372 | When field hashes are used, the basic structure remains the same. |
| 373 | Each lexical hash will be made a field hash. The call to C<refaddr> |
| 374 | can be omitted from the accessor methods. DESTROY and CLONE methods |
| 375 | are not necessary. |
| 376 | |
| 377 | If you have an existing inside-out class, simply making all hashes |
| 378 | field hashes with no other change should make no difference. Through |
| 379 | the calls to C<refaddr> or equivalent, the field hashes never get to |
| 380 | see a reference and work like normal hashes. Your DESTROY (and |
| 381 | CLONE) methods are still needed. |
| 382 | |
| 383 | To make the field hashes kick in, it is easiest to redefine C<refaddr> |
| 384 | as |
| 385 | |
| 386 | sub refaddr { shift } |
| 387 | |
| 388 | instead of importing it from C<Scalar::Util>. It should now be possible |
| 389 | to disable DESTROY and CLONE. Note that while it isn't disabled, |
| 390 | DESTROY will be called before the garbage collection of field hashes, |
| 391 | so it will be invoked with a functional object and will continue to |
| 392 | function. |
| 393 | |
| 394 | It is not desirable to import the functions C<fieldhash> and/or |
| 395 | C<fieldhashes> into every class that is going to use them. They |
| 396 | are only used once to set up the class. When the class is up and running, |
| 397 | these functions serve no more purpose. |
| 398 | |
| 399 | If there are only a few field hashes to declare, it is simplest to |
| 400 | |
| 401 | use Hash::Util::FieldHash; |
| 402 | |
| 403 | early and call the functions qualified: |
| 404 | |
| 405 | Hash::Util::FieldHash::fieldhash my %foo; |
| 406 | |
| 407 | Otherwise, import the functions into a convenient package like |
| 408 | C<HUF> or, more general, C<Aux> |
| 409 | |
| 410 | { |
| 411 | package Aux; |
| 412 | use Hash::Util::FieldHash ':all'; |
| 413 | } |
| 414 | |
| 415 | and call |
| 416 | |
| 417 | Aux::fieldhash my %foo; |
| 418 | |
| 419 | as needed. |
| 420 | |
| 421 | =head2 Garbage-Collected Hashes |
| 422 | |
| 423 | Garbage collection in a field hash means that entries will "spontaneously" |
| 424 | disappear when the object that created them disappears. That must be |
| 425 | borne in mind, especially when looping over a field hash. If anything |
| 426 | you do inside the loop could cause an object to go out of scope, a |
| 427 | random key may be deleted from the hash you are looping over. That |
| 428 | can throw the loop iterator, so it's best to cache a consistent snapshot |
| 429 | of the keys and/or values and loop over that. You will still have to |
| 430 | check that a cached entry still exists when you get to it. |
| 431 | |
| 432 | Garbage collection can be confusing when keys are created in a field hash |
| 433 | from normal scalars as well as references. Once a reference is I<used> with |
| 434 | a field hash, the entry will be collected, even if it was later overwritten |
| 435 | with a plain scalar key (every positive integer is a candidate). This |
| 436 | is true even if the original entry was deleted in the meantime. In fact, |
| 437 | deletion from a field hash, and also a test for existence constitute |
| 438 | I<use> in this sense and create a liability to delete the entry when |
| 439 | the reference goes out of scope. If you happen to create an entry |
| 440 | with an identical key from a string or integer, that will be collected |
| 441 | instead. Thus, mixed use of references and plain scalars as field hash |
| 442 | keys is not entirely supported. |
| 443 | |
| 444 | =head1 EXAMPLES |
| 445 | |
| 446 | The examples show a very simple class that implements a I<name>, consisting |
| 447 | of a first and last name (no middle initial). The name class has four |
| 448 | methods: |
| 449 | |
| 450 | =over |
| 451 | |
| 452 | =item * C<init()> |
| 453 | |
| 454 | An object method that initializes the first and last name to its |
| 455 | two arguments. If called as a class method, C<init()> creates an |
| 456 | object in the given class and initializes that. |
| 457 | |
| 458 | =item * C<first()> |
| 459 | |
| 460 | Retrieve the first name |
| 461 | |
| 462 | =item * C<last()> |
| 463 | |
| 464 | Retrieve the last name |
| 465 | |
| 466 | =item * C<name()> |
| 467 | |
| 468 | Retrieve the full name, the first and last name joined by a blank. |
| 469 | |
| 470 | =back |
| 471 | |
| 472 | The examples show this class implemented with different levels of |
| 473 | support by C<Hash::Util::FieldHash>. All supported combinations |
| 474 | are shown. The difference between implementations is often quite |
| 475 | small. The implementations are: |
| 476 | |
| 477 | =over |
| 478 | |
| 479 | =item * C<Name_hash> |
| 480 | |
| 481 | A conventional (not inside-out) implementation where an object is |
| 482 | a hash that stores the field values, without support by |
| 483 | C<Hash::Util::FieldHash>. This implementation doesn't allow |
| 484 | arbitrary inheritance. |
| 485 | |
| 486 | =item * C<Name_id> |
| 487 | |
| 488 | Inside-out implementation based on the C<id()> function. It needs |
| 489 | a C<DESTROY> method. For thread support a C<CLONE> method (not shown) |
| 490 | would also be needed. Instead of C<Hash::Util::FieldHash::id()> the |
| 491 | function C<Scalar::Util::refaddr> could be used with very little |
| 492 | functional difference. This is the basic pattern of an inside-out |
| 493 | class. |
| 494 | |
| 495 | =item * C<Name_idhash> |
| 496 | |
| 497 | Idhash-based inside-out implementation. Like C<Name_id> it needs |
| 498 | a C<DESTROY> method and would need C<CLONE> for thread support. |
| 499 | |
| 500 | =item * C<Name_id_reg> |
| 501 | |
| 502 | Inside-out implementation based on the C<id()> function with explicit |
| 503 | object registry. No destructor is needed and objects are thread safe. |
| 504 | |
| 505 | =item * C<Name_idhash_reg> |
| 506 | |
| 507 | Idhash-based inside-out implementation with explicit object registry. |
| 508 | No destructor is needed and objects are thread safe. |
| 509 | |
| 510 | =item * C<Name_fieldhash> |
| 511 | |
| 512 | FieldHash-based inside-out implementation. Object registry happens |
| 513 | automatically. No destructor is needed and objects are thread safe. |
| 514 | |
| 515 | =back |
| 516 | |
| 517 | These examples are realized in the code below, which could be copied |
| 518 | to a file F<Example.pm>. |
| 519 | |
| 520 | =head2 Example 1 |
| 521 | |
| 522 | use strict; use warnings; |
| 523 | |
| 524 | { |
| 525 | package Name_hash; # standard implementation: the |
| 526 | # object is a hash |
| 527 | sub init { |
| 528 | my $obj = shift; |
| 529 | my ($first, $last) = @_; |
| 530 | # create an object if called as class method |
| 531 | $obj = bless {}, $obj unless ref $obj; |
| 532 | $obj->{ first} = $first; |
| 533 | $obj->{ last} = $last; |
| 534 | $obj; |
| 535 | } |
| 536 | |
| 537 | sub first { shift()->{ first} } |
| 538 | sub last { shift()->{ last} } |
| 539 | |
| 540 | sub name { |
| 541 | my $n = shift; |
| 542 | join ' ' => $n->first, $n->last; |
| 543 | } |
| 544 | |
| 545 | } |
| 546 | |
| 547 | { |
| 548 | package Name_id; |
| 549 | use Hash::Util::FieldHash qw(id); |
| 550 | |
| 551 | my (%first, %last); |
| 552 | |
| 553 | sub init { |
| 554 | my $obj = shift; |
| 555 | my ($first, $last) = @_; |
| 556 | # create an object if called as class method |
| 557 | $obj = bless \ my $o, $obj unless ref $obj; |
| 558 | $first{ id $obj} = $first; |
| 559 | $last{ id $obj} = $last; |
| 560 | $obj; |
| 561 | } |
| 562 | |
| 563 | sub first { $first{ id shift()} } |
| 564 | sub last { $last{ id shift()} } |
| 565 | |
| 566 | sub name { |
| 567 | my $n = shift; |
| 568 | join ' ' => $n->first, $n->last; |
| 569 | } |
| 570 | |
| 571 | sub DESTROY { |
| 572 | my $id = id shift; |
| 573 | delete $first{ $id}; |
| 574 | delete $last{ $id}; |
| 575 | } |
| 576 | |
| 577 | } |
| 578 | |
| 579 | { |
| 580 | package Name_idhash; |
| 581 | use Hash::Util::FieldHash; |
| 582 | |
| 583 | Hash::Util::FieldHash::idhashes( \ my (%first, %last) ); |
| 584 | |
| 585 | sub init { |
| 586 | my $obj = shift; |
| 587 | my ($first, $last) = @_; |
| 588 | # create an object if called as class method |
| 589 | $obj = bless \ my $o, $obj unless ref $obj; |
| 590 | $first{ $obj} = $first; |
| 591 | $last{ $obj} = $last; |
| 592 | $obj; |
| 593 | } |
| 594 | |
| 595 | sub first { $first{ shift()} } |
| 596 | sub last { $last{ shift()} } |
| 597 | |
| 598 | sub name { |
| 599 | my $n = shift; |
| 600 | join ' ' => $n->first, $n->last; |
| 601 | } |
| 602 | |
| 603 | sub DESTROY { |
| 604 | my $n = shift; |
| 605 | delete $first{ $n}; |
| 606 | delete $last{ $n}; |
| 607 | } |
| 608 | |
| 609 | } |
| 610 | |
| 611 | { |
| 612 | package Name_id_reg; |
| 613 | use Hash::Util::FieldHash qw(id register); |
| 614 | |
| 615 | my (%first, %last); |
| 616 | |
| 617 | sub init { |
| 618 | my $obj = shift; |
| 619 | my ($first, $last) = @_; |
| 620 | # create an object if called as class method |
| 621 | $obj = bless \ my $o, $obj unless ref $obj; |
| 622 | register( $obj, \ (%first, %last) ); |
| 623 | $first{ id $obj} = $first; |
| 624 | $last{ id $obj} = $last; |
| 625 | $obj; |
| 626 | } |
| 627 | |
| 628 | sub first { $first{ id shift()} } |
| 629 | sub last { $last{ id shift()} } |
| 630 | |
| 631 | sub name { |
| 632 | my $n = shift; |
| 633 | join ' ' => $n->first, $n->last; |
| 634 | } |
| 635 | } |
| 636 | |
| 637 | { |
| 638 | package Name_idhash_reg; |
| 639 | use Hash::Util::FieldHash qw(register); |
| 640 | |
| 641 | Hash::Util::FieldHash::idhashes \ my (%first, %last); |
| 642 | |
| 643 | sub init { |
| 644 | my $obj = shift; |
| 645 | my ($first, $last) = @_; |
| 646 | # create an object if called as class method |
| 647 | $obj = bless \ my $o, $obj unless ref $obj; |
| 648 | register( $obj, \ (%first, %last) ); |
| 649 | $first{ $obj} = $first; |
| 650 | $last{ $obj} = $last; |
| 651 | $obj; |
| 652 | } |
| 653 | |
| 654 | sub first { $first{ shift()} } |
| 655 | sub last { $last{ shift()} } |
| 656 | |
| 657 | sub name { |
| 658 | my $n = shift; |
| 659 | join ' ' => $n->first, $n->last; |
| 660 | } |
| 661 | } |
| 662 | |
| 663 | { |
| 664 | package Name_fieldhash; |
| 665 | use Hash::Util::FieldHash; |
| 666 | |
| 667 | Hash::Util::FieldHash::fieldhashes \ my (%first, %last); |
| 668 | |
| 669 | sub init { |
| 670 | my $obj = shift; |
| 671 | my ($first, $last) = @_; |
| 672 | # create an object if called as class method |
| 673 | $obj = bless \ my $o, $obj unless ref $obj; |
| 674 | $first{ $obj} = $first; |
| 675 | $last{ $obj} = $last; |
| 676 | $obj; |
| 677 | } |
| 678 | |
| 679 | sub first { $first{ shift()} } |
| 680 | sub last { $last{ shift()} } |
| 681 | |
| 682 | sub name { |
| 683 | my $n = shift; |
| 684 | join ' ' => $n->first, $n->last; |
| 685 | } |
| 686 | } |
| 687 | |
| 688 | 1; |
| 689 | |
| 690 | To exercise the various implementations the script L<below|/"Example 2"> can |
| 691 | be used. |
| 692 | |
| 693 | It sets up a class C<Name> that is a mirror of one of the implementation |
| 694 | classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines |
| 695 | which implementation is run. |
| 696 | |
| 697 | The script first verifies the function of the C<Name> class. |
| 698 | |
| 699 | In the second step, the free inheritability of the implementation |
| 700 | (or lack thereof) is demonstrated. For this purpose it constructs |
| 701 | a class called C<NamedFile> which is a common subclass of C<Name> and |
| 702 | the standard class C<IO::File>. This puts inheritability to the test |
| 703 | because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile> |
| 704 | should behave like a file opened for reading and also support the C<name()> |
| 705 | method. This class juncture works with exception of the C<Name_hash> |
| 706 | implementation, where object initialization fails because of the |
| 707 | incompatibility of object bodies. |
| 708 | |
| 709 | =head2 Example 2 |
| 710 | |
| 711 | use strict; use warnings; $| = 1; |
| 712 | |
| 713 | use Example; |
| 714 | |
| 715 | { |
| 716 | package Name; |
| 717 | use base 'Name_id'; # define here which implementation to run |
| 718 | } |
| 719 | |
| 720 | |
| 721 | # Verify that the base package works |
| 722 | my $n = Name->init(qw(Albert Einstein)); |
| 723 | print $n->name, "\n"; |
| 724 | print "\n"; |
| 725 | |
| 726 | # Create a named file handle (See definition below) |
| 727 | my $nf = NamedFile->init(qw(/tmp/x Filomena File)); |
| 728 | # use as a file handle... |
| 729 | for ( 1 .. 3 ) { |
| 730 | my $l = <$nf>; |
| 731 | print "line $_: $l"; |
| 732 | } |
| 733 | # ...and as a Name object |
| 734 | print "...brought to you by ", $nf->name, "\n"; |
| 735 | exit; |
| 736 | |
| 737 | |
| 738 | # Definition of NamedFile |
| 739 | package NamedFile; |
| 740 | use base 'Name'; |
| 741 | use base 'IO::File'; |
| 742 | |
| 743 | sub init { |
| 744 | my $obj = shift; |
| 745 | my ($file, $first, $last) = @_; |
| 746 | $obj = $obj->IO::File::new() unless ref $obj; |
| 747 | $obj->open($file) or die "Can't read '$file': $!"; |
| 748 | $obj->Name::init($first, $last); |
| 749 | } |
| 750 | __END__ |
| 751 | |
| 752 | |
| 753 | =head1 GUTS |
| 754 | |
| 755 | To make C<Hash::Util::FieldHash> work, there were two changes to |
| 756 | F<perl> itself. C<PERL_MAGIC_uvar> was made available for hashes, |
| 757 | and weak references now call uvar C<get> magic after a weakref has been |
| 758 | cleared. The first feature is used to make field hashes intercept |
| 759 | their keys upon access. The second one triggers garbage collection. |
| 760 | |
| 761 | =head2 The C<PERL_MAGIC_uvar> interface for hashes |
| 762 | |
| 763 | C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and |
| 764 | C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which |
| 765 | defines the interface. The call happens for hashes with "uvar" magic |
| 766 | if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set> |
| 767 | fields. Hashes are unaffected if (and as long as) these fields |
| 768 | hold different values. |
| 769 | |
| 770 | Upon the call, the C<mg_obj> field will hold the hash key to be accessed. |
| 771 | Upon return, the C<SV*> value in C<mg_obj> will be used in place of the |
| 772 | original key in the hash access. The integer index value in the first |
| 773 | parameter will be the C<action> value from C<hv_fetch_common>, or -1 |
| 774 | if the call is from C<hv_delete_common>. |
| 775 | |
| 776 | This is a template for a function suitable for the C<uf_val> field in |
| 777 | a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index> |
| 778 | fields are irrelevant. |
| 779 | |
| 780 | IV watch_key(pTHX_ IV action, SV* field) { |
| 781 | MAGIC* mg = mg_find(field, PERL_MAGIC_uvar); |
| 782 | SV* keysv = mg->mg_obj; |
| 783 | /* Do whatever you need to. If you decide to |
| 784 | supply a different key newkey, return it like this |
| 785 | */ |
| 786 | sv_2mortal(newkey); |
| 787 | mg->mg_obj = newkey; |
| 788 | return 0; |
| 789 | } |
| 790 | |
| 791 | =head2 Weakrefs call uvar magic |
| 792 | |
| 793 | When a weak reference is stored in an C<SV> that has "uvar" magic, C<set> |
| 794 | magic is called after the reference has gone stale. This hook can be |
| 795 | used to trigger further garbage-collection activities associated with |
| 796 | the referenced object. |
| 797 | |
| 798 | =head2 How field hashes work |
| 799 | |
| 800 | The three features of key hashes, I<key replacement>, I<thread support>, |
| 801 | and I<garbage collection> are supported by a data structure called |
| 802 | the I<object registry>. This is a private hash where every object |
| 803 | is stored. An "object" in this sense is any reference (blessed or |
| 804 | unblessed) that has been used as a field hash key. |
| 805 | |
| 806 | The object registry keeps track of references that have been used as |
| 807 | field hash keys. The keys are generated from the reference address |
| 808 | like in a field hash (though the registry isn't a field hash). Each |
| 809 | value is a weak copy of the original reference, stored in an C<SV> that |
| 810 | is itself magical (C<PERL_MAGIC_uvar> again). The magical structure |
| 811 | holds a list (another hash, really) of field hashes that the reference |
| 812 | has been used with. When the weakref becomes stale, the magic is |
| 813 | activated and uses the list to delete the reference from all field |
| 814 | hashes it has been used with. After that, the entry is removed from |
| 815 | the object registry itself. Implicitly, that frees the magic structure |
| 816 | and the storage it has been using. |
| 817 | |
| 818 | Whenever a reference is used as a field hash key, the object registry |
| 819 | is checked and a new entry is made if necessary. The field hash is |
| 820 | then added to the list of fields this reference has used. |
| 821 | |
| 822 | The object registry is also used to repair a field hash after thread |
| 823 | cloning. Here, the entire object registry is processed. For every |
| 824 | reference found there, the field hashes it has used are visited and |
| 825 | the entry is updated. |
| 826 | |
| 827 | =head2 Internal function Hash::Util::FieldHash::_fieldhash |
| 828 | |
| 829 | # test if %hash is a field hash |
| 830 | my $result = _fieldhash \ %hash, 0; |
| 831 | |
| 832 | # make %hash a field hash |
| 833 | my $result = _fieldhash \ %hash, 1; |
| 834 | |
| 835 | C<_fieldhash> is the internal function used to create field hashes. |
| 836 | It takes two arguments, a hashref and a mode. If the mode is boolean |
| 837 | false, the hash is not changed but tested if it is a field hash. If |
| 838 | the hash isn't a field hash the return value is boolean false. If it |
| 839 | is, the return value indicates the mode of field hash. When called with |
| 840 | a boolean true mode, it turns the given hash into a field hash of this |
| 841 | mode, returning the mode of the created field hash. C<_fieldhash> |
| 842 | does not erase the given hash. |
| 843 | |
| 844 | Currently there is only one type of field hash, and only the boolean |
| 845 | value of the mode makes a difference, but that may change. |
| 846 | |
| 847 | =head1 AUTHOR |
| 848 | |
| 849 | Anno Siegel (ANNO) wrote the xs code and the changes in perl proper |
| 850 | Jerry Hedden (JDHEDDEN) made it faster |
| 851 | |
| 852 | =head1 COPYRIGHT AND LICENSE |
| 853 | |
| 854 | Copyright (C) 2006-2007 by (Anno Siegel) |
| 855 | |
| 856 | This library is free software; you can redistribute it and/or modify |
| 857 | it under the same terms as Perl itself, either Perl version 5.8.7 or, |
| 858 | at your option, any later version of Perl 5 you may have available. |
| 859 | |
| 860 | =cut |