This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
new perldelta
[perl5.git] / dist / Search-Dict / lib / Search / Dict.pm
CommitLineData
a0d0e21e
LW
1package Search::Dict;
2require 5.000;
3require Exporter;
4
0b0a7092
CBW
5my $fc_available;
6BEGIN {
7 $fc_available = '5.015008';
8 if ( $] ge $fc_available ) {
9 require feature;
10 'feature'->import('fc'); # string avoids warning on old Perls <sigh>
11 }
12}
13
b75c8c73
MS
14use strict;
15
0b0a7092 16our $VERSION = '1.07';
b75c8c73
MS
17our @ISA = qw(Exporter);
18our @EXPORT = qw(look);
a0d0e21e 19
5be1dfc7 20=head1 NAME
a0d0e21e 21
04101b8f 22Search::Dict - look - search for key in dictionary file
5be1dfc7
HF
23
24=head1 SYNOPSIS
25
26 use Search::Dict;
6e372064
JH
27 look *FILEHANDLE, $key, $dict, $fold;
28
29 use Search::Dict;
30 look *FILEHANDLE, $params;
5be1dfc7
HF
31
32=head1 DESCRIPTION
33
34Sets file position in FILEHANDLE to be first line greater than or equal
35(stringwise) to I<$key>. Returns the new file position, or -1 if an error
36occurs.
37
38The flags specify dictionary order and case folding:
39
40If I<$dict> is true, search by dictionary order (ignore anything but word
37ef5c3b 41characters and whitespace). The default is honour all characters.
5be1dfc7 42
37ef5c3b
JH
43If I<$fold> is true, ignore case. The default is to honour case.
44
37ef5c3b
JH
45If there are only three arguments and the third argument is a hash
46reference, the keys of that hash can have values C<dict>, C<fold>, and
fb78fdcd 47C<comp> or C<xfrm> (see below), and their corresponding values will be
6e372064
JH
48used as the parameters.
49
50If a comparison subroutine (comp) is defined, it must return less than zero,
51zero, or greater than zero, if the first comparand is less than,
52equal, or greater than the second comparand.
53
54If a transformation subroutine (xfrm) is defined, its value is used to
55transform the lines read from the filehandle before their comparison.
5be1dfc7
HF
56
57=cut
a0d0e21e
LW
58
59sub look {
6e372064
JH
60 my($fh,$key,$dict,$fold) = @_;
61 my ($comp, $xfrm);
37ef5c3b 62 if (@_ == 3 && ref $dict eq 'HASH') {
6e372064 63 my $params = $dict;
37ef5c3b 64 $dict = 0;
6e372064
JH
65 $dict = $params->{dict} if exists $params->{dict};
66 $fold = $params->{fold} if exists $params->{fold};
67 $comp = $params->{comp} if exists $params->{comp};
68 $xfrm = $params->{xfrm} if exists $params->{xfrm};
37ef5c3b
JH
69 }
70 $comp = sub { $_[0] cmp $_[1] } unless defined $comp;
5be1dfc7 71 local($_);
0b0a7092
CBW
72 my $fno = fileno $fh;
73 my @stat;
74 if ( defined $fno && $fno >= 0 && ! tied *{$fh} ) { # real, open file
75 @stat = eval { stat($fh) }; # in case fileno lies
76 }
5be1dfc7 77 my($size, $blksize) = @stat[7,11];
0b0a7092
CBW
78 $size = do { seek($fh,0,2); my $s = tell($fh); seek($fh,0,0); $s }
79 unless defined $size;
5be1dfc7 80 $blksize ||= 8192;
a0d0e21e 81 $key =~ s/[^\w\s]//g if $dict;
0b0a7092
CBW
82 if ( $fold ) {
83 $key = $] ge $fc_available ? fc($key) : lc($key);
84 }
37ef5c3b
JH
85 # find the right block
86 my($min, $max) = (0, int($size / $blksize));
87 my $mid;
a0d0e21e
LW
88 while ($max - $min > 1) {
89 $mid = int(($max + $min) / 2);
b75c8c73 90 seek($fh, $mid * $blksize, 0)
5be1dfc7 91 or return -1;
b75c8c73
MS
92 <$fh> if $mid; # probably a partial line
93 $_ = <$fh>;
6e372064 94 $_ = $xfrm->($_) if defined $xfrm;
37ef5c3b 95 chomp;
a0d0e21e 96 s/[^\w\s]//g if $dict;
0b0a7092
CBW
97 if ( $fold ) {
98 $_ = $] ge $fc_available ? fc($_) : lc($_);
99 }
37ef5c3b 100 if (defined($_) && $comp->($_, $key) < 0) {
a0d0e21e
LW
101 $min = $mid;
102 }
103 else {
104 $max = $mid;
105 }
106 }
37ef5c3b 107 # find the right line
a0d0e21e 108 $min *= $blksize;
b75c8c73 109 seek($fh,$min,0)
5be1dfc7 110 or return -1;
b75c8c73 111 <$fh> if $min;
5be1dfc7 112 for (;;) {
b75c8c73
MS
113 $min = tell($fh);
114 defined($_ = <$fh>)
5be1dfc7 115 or last;
6e372064 116 $_ = $xfrm->($_) if defined $xfrm;
37ef5c3b 117 chomp;
a0d0e21e 118 s/[^\w\s]//g if $dict;
0b0a7092
CBW
119 if ( $fold ) {
120 $_ = $] ge $fc_available ? fc($_) : lc($_);
121 }
37ef5c3b 122 last if $comp->($_, $key) >= 0;
a0d0e21e 123 }
b75c8c73 124 seek($fh,$min,0);
a0d0e21e
LW
125 $min;
126}
127
1281;