Commit | Line | Data |
---|---|---|
a0d0e21e LW |
1 | package Search::Dict; |
2 | require 5.000; | |
3 | require Exporter; | |
4 | ||
0b0a7092 CBW |
5 | my $fc_available; |
6 | BEGIN { | |
7 | $fc_available = '5.015008'; | |
8 | if ( $] ge $fc_available ) { | |
9 | require feature; | |
10 | 'feature'->import('fc'); # string avoids warning on old Perls <sigh> | |
11 | } | |
12 | } | |
13 | ||
b75c8c73 MS |
14 | use strict; |
15 | ||
0b0a7092 | 16 | our $VERSION = '1.07'; |
b75c8c73 MS |
17 | our @ISA = qw(Exporter); |
18 | our @EXPORT = qw(look); | |
a0d0e21e | 19 | |
5be1dfc7 | 20 | =head1 NAME |
a0d0e21e | 21 | |
04101b8f | 22 | Search::Dict - look - search for key in dictionary file |
5be1dfc7 HF |
23 | |
24 | =head1 SYNOPSIS | |
25 | ||
26 | use Search::Dict; | |
6e372064 JH |
27 | look *FILEHANDLE, $key, $dict, $fold; |
28 | ||
29 | use Search::Dict; | |
30 | look *FILEHANDLE, $params; | |
5be1dfc7 HF |
31 | |
32 | =head1 DESCRIPTION | |
33 | ||
34 | Sets file position in FILEHANDLE to be first line greater than or equal | |
35 | (stringwise) to I<$key>. Returns the new file position, or -1 if an error | |
36 | occurs. | |
37 | ||
38 | The flags specify dictionary order and case folding: | |
39 | ||
40 | If I<$dict> is true, search by dictionary order (ignore anything but word | |
37ef5c3b | 41 | characters and whitespace). The default is honour all characters. |
5be1dfc7 | 42 | |
37ef5c3b JH |
43 | If I<$fold> is true, ignore case. The default is to honour case. |
44 | ||
37ef5c3b JH |
45 | If there are only three arguments and the third argument is a hash |
46 | reference, the keys of that hash can have values C<dict>, C<fold>, and | |
fb78fdcd | 47 | C<comp> or C<xfrm> (see below), and their corresponding values will be |
6e372064 JH |
48 | used as the parameters. |
49 | ||
50 | If a comparison subroutine (comp) is defined, it must return less than zero, | |
51 | zero, or greater than zero, if the first comparand is less than, | |
52 | equal, or greater than the second comparand. | |
53 | ||
54 | If a transformation subroutine (xfrm) is defined, its value is used to | |
55 | transform the lines read from the filehandle before their comparison. | |
5be1dfc7 HF |
56 | |
57 | =cut | |
a0d0e21e LW |
58 | |
59 | sub look { | |
6e372064 JH |
60 | my($fh,$key,$dict,$fold) = @_; |
61 | my ($comp, $xfrm); | |
37ef5c3b | 62 | if (@_ == 3 && ref $dict eq 'HASH') { |
6e372064 | 63 | my $params = $dict; |
37ef5c3b | 64 | $dict = 0; |
6e372064 JH |
65 | $dict = $params->{dict} if exists $params->{dict}; |
66 | $fold = $params->{fold} if exists $params->{fold}; | |
67 | $comp = $params->{comp} if exists $params->{comp}; | |
68 | $xfrm = $params->{xfrm} if exists $params->{xfrm}; | |
37ef5c3b JH |
69 | } |
70 | $comp = sub { $_[0] cmp $_[1] } unless defined $comp; | |
5be1dfc7 | 71 | local($_); |
0b0a7092 CBW |
72 | my $fno = fileno $fh; |
73 | my @stat; | |
74 | if ( defined $fno && $fno >= 0 && ! tied *{$fh} ) { # real, open file | |
75 | @stat = eval { stat($fh) }; # in case fileno lies | |
76 | } | |
5be1dfc7 | 77 | my($size, $blksize) = @stat[7,11]; |
0b0a7092 CBW |
78 | $size = do { seek($fh,0,2); my $s = tell($fh); seek($fh,0,0); $s } |
79 | unless defined $size; | |
5be1dfc7 | 80 | $blksize ||= 8192; |
a0d0e21e | 81 | $key =~ s/[^\w\s]//g if $dict; |
0b0a7092 CBW |
82 | if ( $fold ) { |
83 | $key = $] ge $fc_available ? fc($key) : lc($key); | |
84 | } | |
37ef5c3b JH |
85 | # find the right block |
86 | my($min, $max) = (0, int($size / $blksize)); | |
87 | my $mid; | |
a0d0e21e LW |
88 | while ($max - $min > 1) { |
89 | $mid = int(($max + $min) / 2); | |
b75c8c73 | 90 | seek($fh, $mid * $blksize, 0) |
5be1dfc7 | 91 | or return -1; |
b75c8c73 MS |
92 | <$fh> if $mid; # probably a partial line |
93 | $_ = <$fh>; | |
6e372064 | 94 | $_ = $xfrm->($_) if defined $xfrm; |
37ef5c3b | 95 | chomp; |
a0d0e21e | 96 | s/[^\w\s]//g if $dict; |
0b0a7092 CBW |
97 | if ( $fold ) { |
98 | $_ = $] ge $fc_available ? fc($_) : lc($_); | |
99 | } | |
37ef5c3b | 100 | if (defined($_) && $comp->($_, $key) < 0) { |
a0d0e21e LW |
101 | $min = $mid; |
102 | } | |
103 | else { | |
104 | $max = $mid; | |
105 | } | |
106 | } | |
37ef5c3b | 107 | # find the right line |
a0d0e21e | 108 | $min *= $blksize; |
b75c8c73 | 109 | seek($fh,$min,0) |
5be1dfc7 | 110 | or return -1; |
b75c8c73 | 111 | <$fh> if $min; |
5be1dfc7 | 112 | for (;;) { |
b75c8c73 MS |
113 | $min = tell($fh); |
114 | defined($_ = <$fh>) | |
5be1dfc7 | 115 | or last; |
6e372064 | 116 | $_ = $xfrm->($_) if defined $xfrm; |
37ef5c3b | 117 | chomp; |
a0d0e21e | 118 | s/[^\w\s]//g if $dict; |
0b0a7092 CBW |
119 | if ( $fold ) { |
120 | $_ = $] ge $fc_available ? fc($_) : lc($_); | |
121 | } | |
37ef5c3b | 122 | last if $comp->($_, $key) >= 0; |
a0d0e21e | 123 | } |
b75c8c73 | 124 | seek($fh,$min,0); |
a0d0e21e LW |
125 | $min; |
126 | } | |
127 | ||
128 | 1; |