Commit | Line | Data |
---|---|---|
a0d0e21e LW |
1 | package Search::Dict; |
2 | require 5.000; | |
3 | require Exporter; | |
4 | ||
b75c8c73 MS |
5 | use strict; |
6 | ||
8a365628 | 7 | our $VERSION = '1.03'; |
b75c8c73 MS |
8 | our @ISA = qw(Exporter); |
9 | our @EXPORT = qw(look); | |
a0d0e21e | 10 | |
5be1dfc7 | 11 | =head1 NAME |
a0d0e21e | 12 | |
5be1dfc7 HF |
13 | Search::Dict, look - search for key in dictionary file |
14 | ||
15 | =head1 SYNOPSIS | |
16 | ||
17 | use Search::Dict; | |
6e372064 JH |
18 | look *FILEHANDLE, $key, $dict, $fold; |
19 | ||
20 | use Search::Dict; | |
21 | look *FILEHANDLE, $params; | |
5be1dfc7 HF |
22 | |
23 | =head1 DESCRIPTION | |
24 | ||
25 | Sets file position in FILEHANDLE to be first line greater than or equal | |
26 | (stringwise) to I<$key>. Returns the new file position, or -1 if an error | |
27 | occurs. | |
28 | ||
29 | The flags specify dictionary order and case folding: | |
30 | ||
31 | If I<$dict> is true, search by dictionary order (ignore anything but word | |
37ef5c3b | 32 | characters and whitespace). The default is honour all characters. |
5be1dfc7 | 33 | |
37ef5c3b JH |
34 | If I<$fold> is true, ignore case. The default is to honour case. |
35 | ||
37ef5c3b JH |
36 | If there are only three arguments and the third argument is a hash |
37 | reference, the keys of that hash can have values C<dict>, C<fold>, and | |
fb78fdcd | 38 | C<comp> or C<xfrm> (see below), and their corresponding values will be |
6e372064 JH |
39 | used as the parameters. |
40 | ||
41 | If a comparison subroutine (comp) is defined, it must return less than zero, | |
42 | zero, or greater than zero, if the first comparand is less than, | |
43 | equal, or greater than the second comparand. | |
44 | ||
45 | If a transformation subroutine (xfrm) is defined, its value is used to | |
46 | transform the lines read from the filehandle before their comparison. | |
5be1dfc7 HF |
47 | |
48 | =cut | |
a0d0e21e LW |
49 | |
50 | sub look { | |
6e372064 JH |
51 | my($fh,$key,$dict,$fold) = @_; |
52 | my ($comp, $xfrm); | |
37ef5c3b | 53 | if (@_ == 3 && ref $dict eq 'HASH') { |
6e372064 | 54 | my $params = $dict; |
37ef5c3b | 55 | $dict = 0; |
6e372064 JH |
56 | $dict = $params->{dict} if exists $params->{dict}; |
57 | $fold = $params->{fold} if exists $params->{fold}; | |
58 | $comp = $params->{comp} if exists $params->{comp}; | |
59 | $xfrm = $params->{xfrm} if exists $params->{xfrm}; | |
37ef5c3b JH |
60 | } |
61 | $comp = sub { $_[0] cmp $_[1] } unless defined $comp; | |
5be1dfc7 | 62 | local($_); |
b75c8c73 | 63 | my(@stat) = stat($fh) |
5be1dfc7 HF |
64 | or return -1; |
65 | my($size, $blksize) = @stat[7,11]; | |
66 | $blksize ||= 8192; | |
a0d0e21e | 67 | $key =~ s/[^\w\s]//g if $dict; |
37ef5c3b JH |
68 | $key = lc $key if $fold; |
69 | # find the right block | |
70 | my($min, $max) = (0, int($size / $blksize)); | |
71 | my $mid; | |
a0d0e21e LW |
72 | while ($max - $min > 1) { |
73 | $mid = int(($max + $min) / 2); | |
b75c8c73 | 74 | seek($fh, $mid * $blksize, 0) |
5be1dfc7 | 75 | or return -1; |
b75c8c73 MS |
76 | <$fh> if $mid; # probably a partial line |
77 | $_ = <$fh>; | |
6e372064 | 78 | $_ = $xfrm->($_) if defined $xfrm; |
37ef5c3b | 79 | chomp; |
a0d0e21e | 80 | s/[^\w\s]//g if $dict; |
37ef5c3b JH |
81 | $_ = lc $_ if $fold; |
82 | if (defined($_) && $comp->($_, $key) < 0) { | |
a0d0e21e LW |
83 | $min = $mid; |
84 | } | |
85 | else { | |
86 | $max = $mid; | |
87 | } | |
88 | } | |
37ef5c3b | 89 | # find the right line |
a0d0e21e | 90 | $min *= $blksize; |
b75c8c73 | 91 | seek($fh,$min,0) |
5be1dfc7 | 92 | or return -1; |
b75c8c73 | 93 | <$fh> if $min; |
5be1dfc7 | 94 | for (;;) { |
b75c8c73 MS |
95 | $min = tell($fh); |
96 | defined($_ = <$fh>) | |
5be1dfc7 | 97 | or last; |
6e372064 | 98 | $_ = $xfrm->($_) if defined $xfrm; |
37ef5c3b | 99 | chomp; |
a0d0e21e | 100 | s/[^\w\s]//g if $dict; |
37ef5c3b JH |
101 | $_ = lc $_ if $fold; |
102 | last if $comp->($_, $key) >= 0; | |
a0d0e21e | 103 | } |
b75c8c73 | 104 | seek($fh,$min,0); |
a0d0e21e LW |
105 | $min; |
106 | } | |
107 | ||
108 | 1; |