| 1 | package Search::Dict; |
| 2 | require 5.000; |
| 3 | require Exporter; |
| 4 | |
| 5 | use strict; |
| 6 | |
| 7 | our $VERSION = '1.03'; |
| 8 | our @ISA = qw(Exporter); |
| 9 | our @EXPORT = qw(look); |
| 10 | |
| 11 | =head1 NAME |
| 12 | |
| 13 | Search::Dict, look - search for key in dictionary file |
| 14 | |
| 15 | =head1 SYNOPSIS |
| 16 | |
| 17 | use Search::Dict; |
| 18 | look *FILEHANDLE, $key, $dict, $fold; |
| 19 | |
| 20 | use Search::Dict; |
| 21 | look *FILEHANDLE, $params; |
| 22 | |
| 23 | =head1 DESCRIPTION |
| 24 | |
| 25 | Sets file position in FILEHANDLE to be first line greater than or equal |
| 26 | (stringwise) to I<$key>. Returns the new file position, or -1 if an error |
| 27 | occurs. |
| 28 | |
| 29 | The flags specify dictionary order and case folding: |
| 30 | |
| 31 | If I<$dict> is true, search by dictionary order (ignore anything but word |
| 32 | characters and whitespace). The default is honour all characters. |
| 33 | |
| 34 | If I<$fold> is true, ignore case. The default is to honour case. |
| 35 | |
| 36 | If there are only three arguments and the third argument is a hash |
| 37 | reference, the keys of that hash can have values C<dict>, C<fold>, and |
| 38 | C<comp> or C<xfrm> (see below), and their corresponding values will be |
| 39 | used as the parameters. |
| 40 | |
| 41 | If a comparison subroutine (comp) is defined, it must return less than zero, |
| 42 | zero, or greater than zero, if the first comparand is less than, |
| 43 | equal, or greater than the second comparand. |
| 44 | |
| 45 | If a transformation subroutine (xfrm) is defined, its value is used to |
| 46 | transform the lines read from the filehandle before their comparison. |
| 47 | |
| 48 | =cut |
| 49 | |
| 50 | sub look { |
| 51 | my($fh,$key,$dict,$fold) = @_; |
| 52 | my ($comp, $xfrm); |
| 53 | if (@_ == 3 && ref $dict eq 'HASH') { |
| 54 | my $params = $dict; |
| 55 | $dict = 0; |
| 56 | $dict = $params->{dict} if exists $params->{dict}; |
| 57 | $fold = $params->{fold} if exists $params->{fold}; |
| 58 | $comp = $params->{comp} if exists $params->{comp}; |
| 59 | $xfrm = $params->{xfrm} if exists $params->{xfrm}; |
| 60 | } |
| 61 | $comp = sub { $_[0] cmp $_[1] } unless defined $comp; |
| 62 | local($_); |
| 63 | my(@stat) = stat($fh) |
| 64 | or return -1; |
| 65 | my($size, $blksize) = @stat[7,11]; |
| 66 | $blksize ||= 8192; |
| 67 | $key =~ s/[^\w\s]//g if $dict; |
| 68 | $key = lc $key if $fold; |
| 69 | # find the right block |
| 70 | my($min, $max) = (0, int($size / $blksize)); |
| 71 | my $mid; |
| 72 | while ($max - $min > 1) { |
| 73 | $mid = int(($max + $min) / 2); |
| 74 | seek($fh, $mid * $blksize, 0) |
| 75 | or return -1; |
| 76 | <$fh> if $mid; # probably a partial line |
| 77 | $_ = <$fh>; |
| 78 | $_ = $xfrm->($_) if defined $xfrm; |
| 79 | chomp; |
| 80 | s/[^\w\s]//g if $dict; |
| 81 | $_ = lc $_ if $fold; |
| 82 | if (defined($_) && $comp->($_, $key) < 0) { |
| 83 | $min = $mid; |
| 84 | } |
| 85 | else { |
| 86 | $max = $mid; |
| 87 | } |
| 88 | } |
| 89 | # find the right line |
| 90 | $min *= $blksize; |
| 91 | seek($fh,$min,0) |
| 92 | or return -1; |
| 93 | <$fh> if $min; |
| 94 | for (;;) { |
| 95 | $min = tell($fh); |
| 96 | defined($_ = <$fh>) |
| 97 | or last; |
| 98 | $_ = $xfrm->($_) if defined $xfrm; |
| 99 | chomp; |
| 100 | s/[^\w\s]//g if $dict; |
| 101 | $_ = lc $_ if $fold; |
| 102 | last if $comp->($_, $key) >= 0; |
| 103 | } |
| 104 | seek($fh,$min,0); |
| 105 | $min; |
| 106 | } |
| 107 | |
| 108 | 1; |