This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Build the nonxs extensions before running mktables.
[perl5.git] / win32 / bin / search.pl
CommitLineData
d444a431
TB
1#!/usr/local/bin/perl -w
2'di';
3'ig00';
4##############################################################################
5##
6## search
7##
8## Jeffrey Friedl (jfriedl@omron.co.jp), Dec 1994.
9## Copyright 19.... ah hell, just take it.
10##
11## BLURB:
12## A combo of find and grep -- more or less do a 'grep' on a whole
13## directory tree. Fast, with lots of options. Much more powerful than
14## the simple "find ... | xargs grep ....". Has a full man page.
15## Powerfully customizable.
16##
17## This file is big, but mostly comments and man page.
18##
19## See man page for usage info.
20## Return value: 2=error, 1=nothing found, 0=something found.
21##
22
23$version = "950918.5";
24##
25## "950918.5";
26## Changed all 'sysread' to 'read' because Linux perl's don't seem
27## to like sysread()
28##
29## "941227.4";
30## Added -n, -u
31##
32## "941222.3"
33## Added -nice (due to Lionel Cons <Lionel.Cons@cern.ch>)
34## Removed any leading "./" from name.
35## Added default flags for ~/.search, including TTY, -nice, -list, etc.
36## Program name now has path removed when printed in diagnostics.
37## Added simple tilde-expansion to -dir arg.
38## Added -dskip, etc. Fixed -iregex bug.
39## Changed -dir to be additive, adding -ddir.
40## Now screen out devices, pipes, and sockets.
41## More tidying and lots of expanding of the man page
42##
43##
44## "941217.2";
45## initial release.
46
47$stripped=0;
48
49&init;
2eb25c99
JH
50if (exists $ENV{'HOME'}) {
51 $rc_file = join('/', $ENV{'HOME'}, ".search");
52}
53else {
54 $rc_file = "";
55}
d444a431
TB
56
57&check_args;
58
59## Make sure we've got a regex.
60## Don't need one if -find or -showrc was specified.
61$!=2, die "expecting regex arguments.\n"
62 if $FIND_ONLY == 0 && $showrc == 0 && @ARGV == 0;
63
64&prepare_to_search($rc_file);
65
66&import_program if !defined &dodir; ## BIG key to speed.
67
68## do search while there are directories to be done.
69&dodir(shift(@todo)) while @todo;
70
71&clear_message if $VERBOSE && $STDERR_IS_TTY;
72exit($retval);
73###############################################################################
74
75sub init
76{
77 ## initialize variables that might be reset by command-line args
78 $DOREP=0; ## set true by -dorep (redo multi-hardlink files)
1c1c7f20 79 $DOREP=1 if $^O eq 'MSWin32';
d444a431
TB
80 $DO_SORT=0; ## set by -sort (sort files in a dir before checking)
81 $FIND_ONLY=0; ## set by -find (don't search files)
82 $LIST_ONLY=0; ## set true by -l (list filenames only)
83 $NEWER=0; ## set by -newer, "-mtime -###"
84 $NICE=0; ## set by -nice (print human-readable output)
85 $NOLINKS=0; ## set true by -nolinks (don't follow symlinks)
86 $OLDER=0; ## set by -older, "-mtime ###"
87 $PREPEND_FILENAME=1; ## set false by -h (don't prefix lines with filename)
88 $REPORT_LINENUM=0; ## set true by -n (show line numbers)
89 $VERBOSE=0; ## set to a value by -v, -vv, etc. (verbose messages)
90 $WHY=0; ## set true by -why, -vvv+ (report why skipped)
91 $XDEV=0; ## set true by -xdev (stay on one filesystem)
92 $all=0; ## set true by -all (don't skip many kinds of files)
93 $iflag = ''; ## set to 'i' by -i (ignore case);
94 $norc=0; ## set by -norc (don't load rc file)
95 $showrc=0; ## set by -showrc (show what happens with rc file)
96 $underlineOK=0; ## set true by -u (watch for underline stuff)
97 $words=0; ## set true by -w (match whole-words only)
98 $DELAY=0; ## inter-file delay (seconds)
99 $retval=1; ## will set to 0 if we find anything.
100
101 ## various elements of stat() that we might access
102 $STAT_DEV = 1;
103 $STAT_INODE = 2;
104 $STAT_MTIME = 9;
105
106 $VV_PRINT_COUNT = 50; ## with -vv, print every VV_PRINT_COUNT files, or...
107 $VV_SIZE = 1024*1024; ## ...every VV_SIZE bytes searched
108 $vv_print = $vv_size = 0; ## running totals.
109
110 ## set default options, in case the rc file wants them
111 $opt{'TTY'}= 1 if -t STDOUT;
112
113 ## want to know this for debugging message stuff
114 $STDERR_IS_TTY = -t STDERR ? 1 : 0;
115 $STDERR_SCREWS_STDOUT = ($STDERR_IS_TTY && -t STDOUT) ? 1 : 0;
116
117 $0 =~ s,.*/,,; ## clean up $0 for any diagnostics we'll be printing.
118}
119
120##
121## Check arguments.
122##
123sub check_args
124{
125 while (@ARGV && $ARGV[0] =~ m/^-/)
126 {
127 $arg = shift(@ARGV);
128
129 if ($arg eq '-version' || ($VERBOSE && $arg eq '-help')) {
130 print qq/Jeffrey's file search, version "$version".\n/;
131 exit(0) unless $arg eq '-help';
132 }
133 if ($arg eq '-help') {
134 print <<INLINE_LITERAL_TEXT;
135usage: $0 [options] [-e] [PerlRegex ....]
136OPTIONS TELLING *WHERE* TO SEARCH:
137 -dir DIR start search at the named directory (default is current dir).
138 -xdev stay on starting file system.
139 -sort sort the files in each directory before processing.
140 -nolinks don't follow symbolic links.
141OPTIONS TELLING WHICH FILES TO EVEN CONSIDER:
142 -mtime # consider files modified > # days ago (-# for < # days old)
143 -newer FILE consider files modified more recently than FILE (also -older)
144 -name GLOB consider files whose name matches pattern (also -regex).
145 -skip GLOB opposite of -name: identifies files to not consider.
146 -path GLOB like -name, but for files whose whole path is described.
147 -dpath/-dregex/-dskip versions for selecting or pruning directories.
148 -all don't skip any files marked to be skipped by the startup file.
149 -x<SPECIAL> (see manual, and/or try -showrc).
150 -why report why a file isn't checked (also implied by -vvvv).
151OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED:
152 -f | -find just list files (PerlRegex ignored). Default is to grep them.
153 -ff | -ffind Does a faster -find (implies -find -all -dorep)
154OPTIONS CONTROLLING HOW THE SEARCH IS DONE (AND WHAT IS PRINTED):
155 -l | -list only list files with matches, not the lines themselves.
156 -nice | -nnice print more "human readable" output.
157 -n prefix each output line with its line number in the file.
158 -h don't prefix output lines with file name.
159 -u also look "inside" manpage-style underlined text
160 -i do case-insensitive searching.
161 -w match words only (as defined by perl's \\b).
162OTHER OPTIONS:
163 -v, -vv, -vvv various levels of message verbosity.
164 -e end of options (in case a regex looks like an option).
165 -showrc show what the rc file sets, then exit.
166 -norc don't load the rc file.
167 -dorep check files with multiple hard links multiple times.
168INLINE_LITERAL_TEXT
169 print "Use -v -help for more verbose help.\n" unless $VERBOSE;
170 print "This script file is also a man page.\n" unless $stripped;
171 print <<INLINE_LITERAL_TEXT if $VERBOSE;
172
173If -f (or -find) given, PerlRegex is optional and ignored.
174Otherwise, will search for files with lines matching any of the given regexes.
175
176Combining things like -name and -mtime implies boolean AND.
177However, duplicating things (such as -name '*.c' -name '*.txt') implies OR.
178
179-mtime may be given floating point (i.e. 1.5 is a day and a half).
180-iskip/-idskip/-ipath/... etc are case-insensitive versions.
181
182If any letter in -newer/-older is upper case, "or equal" is
183inserted into the test.
184
185You can always find the latest version on the World Wide Web in
186 http://www.wg.omron.co.jp/~jfriedl/perl/
187INLINE_LITERAL_TEXT
188 exit(0);
189 }
190 $DOREP=1, next if $arg eq '-dorep'; ## do repeats
191 $DO_SORT=1, next if $arg eq '-sort'; ## sort files
192 $NOLINKS=1, next if $arg eq '-nolinks'; ## no sym. links
193 $PREPEND_FILENAME=0, next if $arg eq '-h'; ## no filename prefix
194 $REPORT_LINENUM=1, next if $arg eq '-n'; ## show line numbers
195 $WHY=1, next if $arg eq '-why'; ## tell why skipped
196 $XDEV=1, next if $arg eq '-xdev'; ## don't leave F.S.
197 $all=1,$opt{'-all'}=1,next if $arg eq '-all'; ## don't skip *.Z, etc
198 $iflag='i', next if $arg eq '-i'; ## ignore case
199 $norc=1, next if $arg eq '-norc'; ## don't load rc file
200 $showrc=1, next if $arg eq '-showrc'; ## show rc file
201 $underlineOK=1, next if $arg eq '-u'; ## look throuh underln.
202 $words=1, next if $arg eq '-w'; ## match "words" only
203 &strip if $arg eq '-strip'; ## dump this program
204 last if $arg eq '-e';
205 $DELAY=$1, next if $arg =~ m/-delay(\d+)/;
206
207 $FIND_ONLY=1, next if $arg =~/^-f(ind)?$/;## do "find" only
208
209 $FIND_ONLY=1, $DOREP=1, $all=1,
210 next if $arg =~/^-ff(ind)?$/;## fast -find
211 $LIST_ONLY=1,$opt{'-list'}=1,
212 next if $arg =~/^-l(ist)?$/;## only list files
213
214 if ($arg =~ m/^-(v+)$/) { ## verbosity
215 $VERBOSE =length($1);
216 foreach $len (1..$VERBOSE) { $opt{'-'.('v' x $len)}=1 }
217 next;
218 }
219 if ($arg =~ m/^-(n+)ice$/) { ## "nice" output
220 $NICE =length($1);
221 foreach $len (1..$NICE) { $opt{'-'.('n' x $len).'ice'}=1 }
222 next;
223 }
224
225 if ($arg =~ m/^-(i?)(d?)skip$/) {
226 local($i) = $1 eq 'i';
227 local($d) = $2 eq 'd';
228 $! = 2, die qq/$0: expecting glob arg to -$arg\n/ unless @ARGV;
229 foreach (split(/\s+/, shift @ARGV)) {
230 if ($d) {
231 $idskip{$_}=1 if $i;
232 $dskip{$_}=1;
233 } else {
234 $iskip{$_}=1 if $i;
235 $skip{$_}=1;
236 }
237 }
238 next;
239 }
240
241
242 if ($arg =~ m/^-(i?)(d?)(regex|path|name)$/) {
243 local($i) = $1 eq 'i';
244 $! = 2, die qq/$0: expecting arg to -$arg\n/ unless @ARGV;
245 foreach (split(/\s+/, shift @ARGV)) {
246 $iname{join(',', $arg, $_)}=1 if $i;
247 $name{join(',', $arg, $_)}=1;
248 }
249 next;
250 }
251
252 if ($arg =~ m/^-d?dir$/) {
253 $opt{'-dir'}=1;
254 $! = 2, die qq/$0: expecting filename arg to -$arg\n/ unless @ARGV;
255 $start = shift(@ARGV);
256 $start =~ s#^~(/+|$)#$ENV{'HOME'}$1# if defined $ENV{'HOME'};
257 $! = 2, die qq/$0: can't find ${arg}'s "$start"\n/ unless -e $start;
258 $! = 2, die qq/$0: ${arg}'s "$start" not a directory.\n/ unless -d _;
259 undef(@todo), $opt{'-ddir'}=1 if $arg eq '-ddir';
260 push(@todo, $start);
261 next;
262 }
263
264 if ($arg =~ m/^-(new|old)er$/i) {
265 $! = 2, die "$0: expecting filename arg to -$arg\n" unless @ARGV;
266 local($file, $time) = shift(@ARGV);
267 $! = 2, die qq/$0: can't stat -${arg}'s "$file"./
268 unless $time = (stat($file))[$STAT_MTIME];
269 local($upper) = $arg =~ tr/A-Z//;
270 if ($arg =~ m/new/i) {
271 $time++ unless $upper;
272 $NEWER = $time if $NEWER < $time;
273 } else {
274 $time-- unless $upper;
275 $OLDER = $time if $OLDER == 0 || $OLDER > $time;
276 }
277 next;
278 }
279
280 if ($arg =~ m/-mtime/) {
281 $! = 2, die "$0: expecting numerical arg to -$arg\n" unless @ARGV;
282 local($days) = shift(@ARGV);
283 $! = 2, die qq/$0: inappropriate arg ($days) to $arg\n/ if $days==0;
284 $days *= 3600 * 24;
285 if ($days < 0) {
286 local($time) = $^T + $days;
287 $NEWER = $time if $NEWER < $time;
288 } else {
289 local($time) = $^T - $days;
290 $OLDER = $time if $OLDER == 0 || $OLDER > $time;
291 }
292 next;
293 }
294
295 ## special user options
296 if ($arg =~ m/^-x(.+)/) {
297 foreach (split(/[\s,]+/, $1)) { $user_opt{$_} = $opt{$_}= 1; }
298 next;
299 }
300
301 $! = 2, die "$0: unknown arg [$arg]\n";
302 }
303}
304
305##
306## Given a filename glob, return a regex.
307## If the glob has no globbing chars (no * ? or [..]), then
308## prepend an effective '*' to it.
309##
310sub glob_to_regex
311{
312 local($glob) = @_;
313 local(@parts) = $glob =~ m/\\.|[*?]|\[]?[^]]*]|[^[\\*?]+/g;
314 local($trueglob)=0;
315 foreach (@parts) {
316 if ($_ eq '*' || $_ eq '?') {
317 $_ = ".$_";
318 $trueglob=1; ## * and ? are a real glob
319 } elsif (substr($_, 0, 1) eq '[') {
320 $trueglob=1; ## [..] is a real glob
321 } else {
322 s/^\\//; ## remove any leading backslash;
323 s/\W/\\$&/g; ## now quote anything dangerous;
324 }
325 }
326 unshift(@parts, '.*') unless $trueglob;
327 join('', '^', @parts, '$');
328}
329
330sub prepare_to_search
331{
332 local($rc_file) = @_;
333
334 $HEADER_BYTES=0; ## Might be set nonzero in &read_rc;
335 $last_message_length = 0; ## For &message and &clear_message.
336
337 &read_rc($rc_file, $showrc) unless $norc;
338 exit(0) if $showrc;
339
340 $NEXT_DIR_ENTRY = $DO_SORT ? 'shift @files' : 'readdir(DIR)';
341 $WHY = 1 if $VERBOSE > 3; ## Arg -vvvv or above implies -why.
342 @todo = ('.') if @todo == 0; ## Where we'll start looking
343
344 ## see if any user options were specified that weren't accounted for
345 foreach $opt (keys %user_opt) {
346 next if defined $seen_opt{$opt};
347 warn "warning: -x$opt never considered.\n";
348 }
349
350 die "$0: multiple time constraints exclude all possible files.\n"
351 if ($NEWER && $OLDER) && ($NEWER > $OLDER);
352
353 ##
354 ## Process any -skip/-iskip args that had been given
355 ##
356 local(@skip_test);
357 foreach $glob (keys %skip) {
358 $i = defined($iskip{$glob}) ? 'i': '';
359 push(@skip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
360 }
361 if (@skip_test) {
362 $SKIP_TEST = join('||',@skip_test);
363 $DO_SKIP_TEST = 1;
364 } else {
365 $DO_SKIP_TEST = $SKIP_TEST = 0;
366 }
367
368 ##
369 ## Process any -dskip/-idskip args that had been given
370 ##
371 local(@dskip_test);
372 foreach $glob (keys %dskip) {
373 $i = defined($idskip{$glob}) ? 'i': '';
374 push(@dskip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i");
375 }
376 if (@dskip_test) {
377 $DSKIP_TEST = join('||',@dskip_test);
378 $DO_DSKIP_TEST = 1;
379 } else {
380 $DO_DSKIP_TEST = $DSKIP_TEST = 0;
381 }
382
383
384 ##
385 ## Process any -name, -path, -regex, etc. args that had been given.
386 ##
387 undef @name_test;
388 undef @dname_test;
389 foreach $key (keys %name) {
390 local($type, $pat) = split(/,/, $key, 2);
391 local($i) = defined($iname{$key}) ? 'i' : '';
392 if ($type =~ /regex/) {
393 $pat =~ s/!/\\!/g;
394 $test = "\$name =~ m!^$pat\$!$i";
395 } else {
396 local($var) = $type eq 'name' ? '$name' : '$file';
397 $test = "$var =~ m/". &glob_to_regex($pat). "/$i";
398 }
399 if ($type =~ m/^-i?d/) {
400 push(@dname_test, $test);
401 } else {
402 push(@name_test, $test);
403 }
404 }
405 if (@name_test) {
406 $GLOB_TESTS = join('||', @name_test);
407
408 $DO_GLOB_TESTS = 1;
409 } else {
410 $GLOB_TESTS = $DO_GLOB_TESTS = 0;
411 }
412 if (@dname_test) {
413 $DGLOB_TESTS = join('||', @dname_test);
414 $DO_DGLOB_TESTS = 1;
415 } else {
416 $DGLOB_TESTS = $DO_DGLOB_TESTS = 0;
417 }
418
419
420 ##
421 ## Process any 'magic' things from the startup file.
422 ##
423 if (@magic_tests && $HEADER_BYTES) {
424 ## the $magic' one is for when &dodir is not inlined
425 $tests = join('||',@magic_tests);
426 $MAGIC_TESTS = " { package magic; \$val = ($tests) }";
427 $DO_MAGIC_TESTS = 1;
428 } else {
429 $MAGIC_TESTS = 1;
430 $DO_MAGIC_TESTS = 0;
431 }
432
433 ##
434 ## Prepare regular expressions.
435 ##
436 {
437 local(@regex_tests);
438
439 if ($LIST_ONLY) {
440 $mflag = '';
441 ## need to have $* set, but perl5 just won''t shut up about it.
442 if ($] >= 5) {
443 $mflag = 'm';
444 } else {
445 eval ' $* = 1 ';
446 }
447 }
448
449 ##
450 ## Until I figure out a better way to deal with it,
451 ## We have to worry about a regex like [^xyz] when doing $LIST_ONLY.
452 ## Such a regex *will* match \n, and if I'm pulling in multiple
453 ## lines, it can allow lines to match that would otherwise not match.
454 ##
455 ## Therefore, if there is a '[^' in a regex, we can NOT take a chance
456 ## an use the fast listonly.
457 ##
458 $CAN_USE_FAST_LISTONLY = $LIST_ONLY;
459
460 local(@extra);
461 local($underline_glue) = ($] >= 5) ? '(:?_\cH)?' : '(_\cH)?';
462 while (@ARGV) {
463 $regex = shift(@ARGV);
464 ##
465 ## If watching for underlined things too, add another regex.
466 ##
467 if ($underlineOK) {
468 if ($regex =~ m/[?*+{}()\\.|^\$[]/) {
469 warn "$0: warning, can't underline-safe ``$regex''.\n";
470 } else {
471 $regex = join($underline_glue, split(//, $regex));
472 }
473 }
474
475 ## If nothing special in the regex, just use index...
476 ## is quite a bit faster.
477 if (($iflag eq '') && ($words == 0) &&
478 $regex !~ m/[?*+{}()\\.|^\$[]/)
479 {
480 push(@regex_tests, "(index(\$_, q+$regex+)>=0)");
481
482 } else {
483 $regex =~ s#[\$\@\/]\w#\\$&#;
484 if ($words) {
485 if ($regex =~ m/\|/) {
486 ## could be dangerous -- see if we can wrap in parens.
487 if ($regex =~ m/\\\d/) {
488 warn "warning: -w and a | in a regex is dangerous.\n"
489 } else {
490 $regex = join($regex, '(', ')');
491 }
492 }
493 $regex = join($regex, '\b', '\b');
494 }
495 $CAN_USE_FAST_LISTONLY = 0 if substr($regex, "[^") >= 0;
496 push(@regex_tests, "m/$regex/$iflag$mflag");
497 }
498
499 ## If we're done, but still have @extra to do, get set for that.
500 if (@ARGV == 0 && @extra) {
501 @ARGV = @extra; ## now deal with the extra stuff.
502 $underlineOK = 0; ## but no more of this.
503 undef @extra; ## or this.
504 }
505 }
506 if (@regex_tests) {
507 $REGEX_TEST = join('||', @regex_tests);
508 ## print STDERR $REGEX_TEST, "\n"; exit;
509 } else {
510 ## must be doing -find -- just give something syntactically correct.
511 $REGEX_TEST = 1;
512 }
513 }
514
515 ##
516 ## Make sure we can read the first item(s).
517 ##
518 foreach $start (@todo) {
519 $! = 2, die qq/$0: can't stat "$start"\n/
520 unless ($dev,$inode) = (stat($start))[$STAT_DEV,$STAT_INODE];
521
522 if (defined $dir_done{"$dev,$inode"}) {
523 ## ignore the repeat.
524 warn(qq/ignoring "$start" (same as "$dir_done{"$dev,$inode"}").\n/)
525 if $VERBOSE;
526 next;
527 }
528
529 ## if -xdev was given, remember the device.
530 $xdev{$dev} = 1 if $XDEV;
531
532 ## Note that we won't want to do it again
533 $dir_done{"$dev,$inode"} = $start;
534 }
535}
536
537
538##
539## See the comment above the __END__ above the 'sub dodir' below.
540##
541sub import_program
542{
543 sub bad {
544 print STDERR "$0: internal error (@_)\n";
545 exit 2;
546 }
547
548 ## Read from data, up to next __END__. This will be &dodir.
549 local($/) = "\n__END__";
550 $prog = <DATA>;
551 close(DATA);
552
553 $prog =~ s/\beval\b//g; ## remove any 'eval'
554
555 ## Inline uppercase $-variables by their current values.
556 if ($] >= 5) {
557 $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/
558 &bad($1) if !defined ${$main::{$1}}; ${$main::{$1}};/eg;
559 } else {
560 $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/local(*VAR) = $_main{$1};
561 &bad($1) if !defined $VAR; $VAR;/eg;
562 }
563
564 eval $prog; ## now do it. This will define &dodir;
565 $!=2, die "$0 internal error: $@\n" if $@;
566}
567
568###########################################################################
569
570##
571## Read the .search file:
572## Blank lines and lines that are only #-comments ignored.
573## Newlines may be escaped to create long lines
574## Other lines are directives.
575##
576## A directive may begin with an optional tag in the form <...>
577## Things inside the <...> are evaluated as with:
578## <(this || that) && must>
579## will be true if
580## -xmust -xthis or -xmust -xthat
581## were specified on the command line (order doesn't matter, though)
582## A directive is not done if there is a tag and it's false.
583## Any characters but whitespace and &|()>,! may appear after an -x
584## (although "-xdev" is special). -xmust,this is the same as -xmust -xthis.
585## Something like -x~ would make <~> true, and <!~> false.
586##
587## Directives are in the form:
588## option: STRING
589## magic : NUMBYTES : EXPR
590##
591## With option:
592## The STRING is parsed like a Bourne shell command line, and the
593## options are used as if given on the command line.
594## No comments are allowed on 'option' lines.
595## Examples:
596## # skip objects and libraries
597## option: -skip '.o .a'
598## # skip emacs *~ and *# files, unless -x~ given:
599## <!~> option: -skip '~ #'
600##
601## With magic:
602## EXPR can be pretty much any perl (comments allowed!).
603## If it evaluates to true for any particular file, it is skipped.
604## The only info you'll have about a file is the variable $H, which
605## will have at least the first NUMBYTES of the file (less if the file
606## is shorter than that, of course, and maybe more). You'll also have
607## any variables you set in previous 'magic' lines.
608## Examples:
609## magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
610## magic: 6 : $x6 eq 'GIF89a'
611##
612## magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a' ## old gif \
613## || $x6 eq 'GIF89a' ## new gif
614## (the above two sets are the same)
615## ## Check the first 32 bytes for "binarish" looking bytes.
616## ## Don't blindly dump on any high-bit set, as non-ASCII text
617## ## often has them set. \x80 and \xff seem to be special, though.
618## ## Require two in a row to not get things like perl's $^T.
619## ## This is known to get *.Z, *.gz, pkzip, *.elc and about any
620## ## executable you'll find.
621## magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
622##
623sub read_rc
624{
625 local($file, $show) = @_;
626 local($line_num, $ln, $tag) = 0;
627 local($use_default, @default) = 0;
628
e8847ffb 629 { package magic; $^W= 0; } ## turn off warnings for when we run EXPR's
d444a431
TB
630
631 unless (open(RC, "$file")) {
632 $use_default=1;
633 $file = "<internal default startup file>";
634 ## no RC file -- use this default.
635 @default = split(/\n/,<<'--------INLINE_LITERAL_TEXT');
636 magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/
a53a623d
JH
637 option: -skip '.a .elc .gz .o .pbm .xbm .dvi'
638 option: -iskip '.com .exe .lib .pdb .tarz .zip .z .lzh .jpg .jpeg .gif .uu'
d444a431
TB
639 <!~> option: -skip '~ #'
640--------INLINE_LITERAL_TEXT
641 }
642
643 ##
644 ## Make an eval error pretty.
645 ##
646 sub clean_eval_error {
647 local($_) = @_;
648 s/ in file \(eval\) at line \d+,//g; ## perl4-style error
649 s/ at \(eval \d+\) line \d+,//g; ## perl5-style error
650 $_ = $` if m/\n/; ## remove all but first line
651 "$_\n";
652 }
653
654 print "reading RC file: $file\n" if $show;
655
656 while (defined($_ = ($use_default ? shift(@default) : <RC>))) {
657 $ln = ++$line_num; ## note starting line num.
658 $_ .= <RC>, $line_num++ while s/\\\n?$/\n/; ## allow continuations
659 next if /^\s*(#.*)?$/; ## skip blank or comment-only lines.
660 $do = '';
661
662 ## look for an initial <...> tag.
663 if (s/^\s*<([^>]*)>//) {
664 ## This simple s// will make the tag ready to eval.
665 ($tag = $msg = $1) =~
666 s/[^\s&|(!)]+/
667 $seen_opt{$&}=1; ## note seen option
668 "defined(\$opt{q>$&>})" ## (q>> is safe quoting here)
669 /eg;
670
671 ## see if the tag is true or not, abort this line if not.
672 $dothis = (eval $tag);
673 $!=2, die "$file $ln <$msg>: $_".&clean_eval_error($@) if $@;
674
675 if ($show) {
676 $msg =~ s/[^\s&|(!)]+/-x$&/;
677 $msg =~ s/\s*!\s*/ no /g;
678 $msg =~ s/\s*&&\s*/ and /g;
679 $msg =~ s/\s*\|\|\s*/ or /g;
680 $msg =~ s/^\s+//; $msg =~ s/\s+$//;
681 $do = $dothis ? "(doing because $msg)" :
682 "(do if $msg)";
683 } elsif (!$dothis) {
684 next;
685 }
686 }
687
688 if (m/^\s*option\s*:\s*/) {
689 next if $all && !$show; ## -all turns off these checks;
690 local($_) = $';
691 s/\n$//;
692 local($orig) = $_;
693 print " $do option: $_\n" if $show;
694 local($0) = "$0 ($file)"; ## for any error message.
695 local(@ARGV);
696 local($this);
697 ##
698 ## Parse $_ as a Bourne shell line -- fill @ARGV
699 ##
700 while (length) {
701 if (s/^\s+//) {
702 push(@ARGV, $this) if defined $this;
703 undef $this;
704 next;
705 }
706 $this = '' if !defined $this;
707 $this .= $1 while s/^'([^']*)'// ||
708 s/^"([^"]*)"// ||
709 s/^([^'"\s\\]+)//||
710 s/^(\\[\D\d])//;
711 die "$file $ln: error parsing $orig at $_\n" if m/^\S/;
712 }
713 push(@ARGV, $this) if defined $this;
714 &check_args;
715 die qq/$file $ln: unused arg "@ARGV".\n/ if @ARGV;
716 next;
717 }
718
719 if (m/^\s*magic\s*:\s*(\d+)\s*:\s*/) {
720 next if $all && !$show; ## -all turns off these checks;
721 local($bytes, $check) = ($1, $');
722
723 if ($show) {
724 $check =~ s/\n?$/\n/;
725 print " $do contents: $check";
726 }
727 ## Check to make sure the thing at least compiles.
728 eval "package magic; (\$H = '1'x \$main'bytes) && (\n$check\n)\n";
729 $! = 2, die "$file $ln: ".&clean_eval_error($@) if $@;
730
731 $HEADER_BYTES = $bytes if $bytes > $HEADER_BYTES;
732 push(@magic_tests, "(\n$check\n)");
733 next;
734 }
735 $! = 2, die "$file $ln: unknown command\n";
736 }
737 close(RC);
738}
739
740sub message
741{
742 if (!$STDERR_IS_TTY) {
743 print STDERR $_[0], "\n";
744 } else {
745 local($text) = @_;
746 $thislength = length($text);
747 if ($thislength >= $last_message_length) {
748 print STDERR $text, "\r";
749 } else {
750 print STDERR $text, ' 'x ($last_message_length-$thislength),"\r";
751 }
752 $last_message_length = $thislength;
753 }
754}
755
756sub clear_message
757{
758 print STDERR ' ' x $last_message_length, "\r" if $last_message_length;
759 $vv_print = $vv_size = $last_message_length = 0;
760}
761
762##
763## Output a copy of this program with comments, extra whitespace, and
764## the trailing man page removed. On an ultra slow machine, such a copy
765## might load faster (but I can't tell any difference on my machine).
766##
767sub strip {
768 seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n";
769 while(<DATA>) {
770 print, next if /INLINE_LITERAL_TEXT/.../INLINE_LITERAL_TEXT/;
771 ## must mention INLINE_LITERAL_TEXT on this line!
772 s/\#\#.*|^\s+|\s+$//; ## remove cruft
773 last if $_ eq '.00;';
774 next if ($_ eq '') || ($_ eq "'di'") || ($_ eq "'ig00'");
775 s/\$stripped=0;/\$stripped=1;/;
776 s/\s\s+/ /; ## squish multiple whitespaces down to one.
777 print $_, "\n";
778 }
779 exit(0);
780}
781
782##
783## Just to shut up -w. Never executed.
784##
785sub dummy {
786
787 1 || &dummy || &dir_done || &bad || &message || $NEXT_DIR_ENTRY ||
788 $DELAY || $VV_SIZE || $VV_PRINT_COUNT || $STDERR_SCREWS_STDOUT ||
789 @files || @files || $magic'H || $magic'H || $xdev{''} || &clear_message;
790
791}
792
793##
794## If the following __END__ is in place, what follows will be
795## inlined when the program first starts up. Any $ variable name
796## all in upper case, specifically, any string matching
797## \$([A-Z][A-Z0-9_]{2,}\b
798## will have the true value for that variable inlined. Also, any 'eval' is
799## removed
800##
801## The idea is that when the whole thing is then eval'ed to define &dodir,
802## the perl optimizer will make all the decisions that are based upon
803## command-line options (such as $VERBOSE), since they'll be inlined as
804## constants
805##
806## Also, and here's the big win, the tests for matching the regex, and a
807## few others, are all inlined. Should be blinding speed here.
808##
809## See the read from <DATA> above for where all this takes place.
810## But all-in-all, you *want* the __END__ here. Comment it out only for
811## debugging....
812##
813
814__END__
815
816##
817## Given a directory, check all "appropriate" files in it.
818## Shove any subdirectories into the global @todo, so they'll be done
819## later.
820##
821## Be careful about adding any upper-case variables, as they are subject
822## to being inlined. See comments above the __END__ above.
823##
824sub dodir
825{
826 local($dir) = @_;
827 $dir =~ s,/+$,,; ## remove any trailing slash.
828 unless (opendir(DIR, "$dir/.")) {
829 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
830 warn qq($0: can't opendir "$dir/".\n);
831 return;
832 }
833
834 if ($VERBOSE) {
835 &message($dir);
836 $vv_print = $vv_size = 0;
837 }
838
839 @files = sort readdir(DIR) if $DO_SORT;
840
841 while (defined($name = eval $NEXT_DIR_ENTRY))
842 {
843 next if $name eq '.' || $name eq '..'; ## never follow these.
844
845 ## create full relative pathname.
846 $file = $dir eq '.' ? $name : "$dir/$name";
847
848 ## if link and skipping them, do so.
849 if ($NOLINKS && -l $file) {
850 warn qq/skip (symlink): $file\n/ if $WHY;
851 next;
852 }
853
854 ## skip things unless files or directories
855 unless (-f $file || -d _) {
856 if ($WHY) {
857 $why = (-S _ && "socket") ||
858 (-p _ && "pipe") ||
859 (-b _ && "block special")||
860 (-c _ && "char special") || "somekinda special";
861 warn qq/skip ($why): $file\n/;
862 }
863 next;
864 }
865
866 ## skip things we can't read
867 unless (-r _) {
868 if ($WHY) {
869 $why = (-l $file) ? "follow" : "read";
870 warn qq/skip (can't $why): $file\n/;
871 }
872 next;
873 }
874
875 ## skip things that are empty
1c1c7f20 876 unless (-s _ || -d _) {
d444a431
TB
877 warn qq/skip (empty): $file\n/ if $WHY;
878 next;
879 }
880
881 ## Note file device & inode. If -xdev, skip if appropriate.
882 ($dev, $inode) = (stat(_))[$STAT_DEV, $STAT_INODE];
883 if ($XDEV && defined $xdev{$dev}) {
884 warn qq/skip (other device): $file\n/ if $WHY;
885 next;
886 }
887 $id = "$dev,$inode";
888
889 ## special work for a directory
890 if (-d _) {
891 ## Do checks for directory file endings.
892 if ($DO_DSKIP_TEST && (eval $DSKIP_TEST)) {
893 warn qq/skip (-dskip): $file\n/ if $WHY;
894 next;
895 }
896 ## do checks for -name/-regex/-path tests
897 if ($DO_DGLOB_TESTS && !(eval $DGLOB_TESTS)) {
898 warn qq/skip (dirname): $file\n/ if $WHY;
899 next;
900 }
901
902 ## _never_ redo a directory
1c1c7f20 903 if (defined $dir_done{$id} and $^O ne 'MSWin32') {
d444a431
TB
904 warn qq/skip (did as "$dir_done{$id}"): $file\n/ if $WHY;
905 next;
906 }
907 $dir_done{$id} = $file; ## mark it done.
908 unshift(@todo, $file); ## add to the list to do.
909 next;
910 }
911 if ($WHY == 0 && $VERBOSE > 1) {
912 if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
913 &message($file);
914 $vv_print = $vv_size = 0;
915 }
916 }
917
918 ## do time-related tests
919 if ($NEWER || $OLDER) {
920 $_ = (stat(_))[$STAT_MTIME];
921 if ($NEWER && $_ < $NEWER) {
922 warn qq/skip (too old): $file\n/ if $WHY;
923 next;
924 }
925 if ($OLDER && $_ > $OLDER) {
926 warn qq/skip (too new): $file\n/ if $WHY;
927 next;
928 }
929 }
930
931 ## do checks for file endings
932 if ($DO_SKIP_TEST && (eval $SKIP_TEST)) {
933 warn qq/skip (-skip): $file\n/ if $WHY;
934 next;
935 }
936
937 ## do checks for -name/-regex/-path tests
938 if ($DO_GLOB_TESTS && !(eval $GLOB_TESTS)) {
939 warn qq/skip (filename): $file\n/ if $WHY;
940 next;
941 }
942
943
944 ## If we're not repeating files,
945 ## skip this one if we've done it, or note we're doing it.
946 unless ($DOREP) {
947 if (defined $file_done{$id}) {
948 warn qq/skip (did as "$file_done{$id}"): $file\n/ if $WHY;
949 next;
950 }
951 $file_done{$id} = $file;
952 }
953
954 if ($DO_MAGIC_TESTS) {
955 if (!open(FILE_IN, $file)) {
956 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
957 warn qq/$0: can't open: $file\n/;
958 next;
959 }
960 unless (read(FILE_IN, $magic'H, $HEADER_BYTES)) {
961 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
962 warn qq/$0: can't read from "$file"\n"/;
963 close(FILE_IN);
964 next;
965 }
966
967 eval $MAGIC_TESTS;
968 if ($magic'val) {
969 close(FILE_IN);
970 warn qq/skip (magic): $file\n/ if $WHY;
971 next;
972 }
973 seek(FILE_IN, 0, 0); ## reset for later <FILE_IN>
974 }
975
976 if ($WHY != 0 && $VERBOSE > 1) {
977 if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){
978 &message($file);
979 $vv_print = $vv_size = 0;
980 }
981 }
982
983 if ($DELAY) {
984 sleep($DELAY);
985 }
986
987 if ($FIND_ONLY) {
988 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
989 print $file, "\n";
990 $retval=0; ## we've found something
991 close(FILE_IN) if $DO_MAGIC_TESTS;
992 next;
993 } else {
994 ## if we weren't doing magic tests, file won't be open yet...
995 if (!$DO_MAGIC_TESTS && !open(FILE_IN, $file)) {
996 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
997 warn qq/$0: can't open: $file\n/;
998 next;
999 }
1000 if ($LIST_ONLY && $CAN_USE_FAST_LISTONLY) {
1001 ##
1002 ## This is rather complex, but buys us a LOT when we're just
1003 ## listing files and not the individual internal lines.
1004 ##
1005 local($size) = 4096; ## block-size in which to do reads
1006 local($nl); ## will point to $_'s ending newline.
1007 local($read); ## will be how many bytes read.
1008 local($_) = ''; ## Starts out empty
1009 local($hold); ## (see below)
1010
1011 while (($read = read(FILE_IN,$_,$size,length($_)))||length($_))
1012 {
1013 undef @parts;
1014 ## if read a full block, but no newline, need to read more.
1015 while ($read == $size && ($nl = rindex($_, "\n")) < 0) {
1016 push(@parts, $_); ## save that part
1017 $read = read(FILE_IN, $_, $size); ## keep trying
1018 }
1019
1020 ##
1021 ## If we had to save parts, must now combine them together.
1022 ## adjusting $nl to reflect the now-larger $_. This should
1023 ## be a lot more efficient than using any kind of .= in the
1024 ## loop above.
1025 ##
1026 if (@parts) {
1027 local($lastlen) = length($_); #only need if $nl >= 0
1028 $_ = join('', @parts, $_);
1029 $nl = length($_) - ($lastlen - $nl) if $nl >= 0;
1030 }
1031
1032 ##
1033 ## If we're at the end of the file, then we can use $_ as
1034 ## is. Otherwise, we need to remove the final partial-line
1035 ## and save it so that it'll be at the beginning of the
1036 ## next read (where the rest of the line will be layed in
1037 ## right after it). $hold will be what we should save
1038 ## until next time.
1039 ##
1040 if ($read != $size || $nl < 0) {
1041 $hold = '';
1042 } else {
1043 $hold = substr($_, $nl + 1);
1044 substr($_, $nl + 1) = '';
1045 }
1046
1047 ##
1048 ## Now have a bunch of full lines in $_. Use it.
1049 ##
1050 if (eval $REGEX_TEST) {
1051 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
1052 print $file, "\n";
1053 $retval=0; ## we've found something
1054
1055 last;
1056 }
1057
1058 ## Prepare for next read....
1059 $_ = $hold;
1060 }
1061
1062 } else { ## else not using faster block scanning.....
1063
1064 $lines_printed = 0 if $NICE;
1065 while (<FILE_IN>) {
1066 study;
1067 next unless (eval $REGEX_TEST);
1068
1069 ##
1070 ## We found a matching line.
1071 ##
1072 $retval=0;
1073 &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT;
1074 if ($LIST_ONLY) {
1075 print $file, "\n";
1076 last;
1077 } else {
1078 ## prepare to print line.
1079 if ($NICE && $lines_printed++ == 0) {
1080 print '-' x 70, "\n" if $NICE > 1;
1081 print $file, ":\n";
1082 }
1083
1084 ##
1085 ## Print all the prelim stuff. This looks less efficient
1086 ## than it needs to be, but that's so that when the eval
1087 ## is compiled (and the tests are optimized away), the
1088 ## result will be less actual PRINTs than the more natural
1089 ## way of doing these tests....
1090 ##
1091 if ($NICE) {
1092 if ($REPORT_LINENUM) {
1093 print " line $.: ";
1094 } else {
1095 print " ";
1096 }
1097 } elsif ($REPORT_LINENUM && $PREPEND_FILENAME) {
1098 print "$file,:$.: ";
1099 } elsif ($PREPEND_FILENAME) {
1100 print "$file: ";
1101 } elsif ($REPORT_LINENUM) {
1102 print "$.: ";
1103 }
1104 print $_;
1105 print "\n" unless m/\n$/;
1106 }
1107 }
1108 print "\n" if ($NICE > 1) && $lines_printed;
1109 }
1110 close(FILE_IN);
1111 }
1112 }
1113 closedir(DIR);
1114}
1115
1116__END__
1117.00; ## finish .ig
1118
1119'di \" finish diversion--previous line must be blank
1120.nr nl 0-1 \" fake up transition to first page again
1121.nr % 0 \" start at page 1
1122.\"__________________NORMAL_MAN_PAGE_BELOW_________________
1123.ll+10n
1124.TH search 1 "Dec 17, 1994"
1125.SH SEARCH
1126search \- search files (a'la grep) in a whole directory tree.
1127.SH SYNOPSIS
1128search [ grep-like and find-like options] [regex ....]
1129.SH DESCRIPTION
1130.I Search
1131is more or less a combo of 'find' and 'grep' (although the regular
1132expression flavor is that of the perl being used, which is closer to
1133egrep's than grep's).
1134
1135.I Search
1136does generally the same kind of thing that
1137.nf
1138 find <blah blah> | xargs egrep <blah blah>
1139.fi
1140does, but is
1141.I much
1142more powerful and efficient (and intuitive, I think).
1143
1144This manual describes
1145.I search
1146as of version "941227.4". You can always find the latest version at
1147.nf
1148 http://www.wg.omron.co.jp/~jfriedl/perl/index.html
1149.fi
1150
1151.SH "QUICK EXAMPLE"
1152Basic use is simple:
1153.nf
1154 % search jeff
1155.fi
1156will search files in the current directory, and all sub directories, for
1157files that have "jeff" in them. The lines will be listed with the
1158containing file's name prepended.
1159.PP
1160If you list more than one regex, such as with
1161.nf
1162 % search jeff Larry Randal+ 'Stoc?k' 'C.*son'
1163.fi
1164then a line containing any of the regexes will be listed.
1165This makes it effectively the same as
1166.nf
1167 % search 'jeff|Larry|Randal+|Stoc?k|C.*son'
1168.fi
1169However, listing them separately is much more efficient (and is easier
1170to type).
1171.PP
1172Note that in the case of these examples, the
1173.B \-w
1174(list whole-words only) option would be useful.
1175.PP
1176Normally, various kinds of files are automatically removed from consideration.
1177If it has has a certain ending (such as ".tar", ".Z", ".o", .etc), or if
1178the beginning of the file looks like a binary, it'll be excluded.
1179You can control exactly how this works -- see below. One quick way to
1180override this is to use the
1181.B \-all
1182option, which means to consider all the files that would normally be
1183automatically excluded.
1184Or, if you're curious, you can use
1185.B \-why
1186to have notes about what files are skipped (and why) printed to stderr.
1187
1188.SH "BASIC OVERVIEW"
1189Normally, the search starts in the current directory, considering files in
1190all subdirectories.
1191
1192You can use the
1193.I ~/.search
1194file to control ways to automatically exclude files.
1195If you don't have this file, a default one will kick in, which automatically
1196add
1197.nf
1198 -skip .o .Z .gif
1199.fi
1200(among others) to exclude those kinds of files (which you probably want to
1201skip when searching for text, as is normal).
1202Files that look to be be binary will also be excluded.
1203
1204Files ending with "#" and "~" will also be excluded unless the
1205.B -x~
1206option is given.
1207
1208You can use
1209.B -showrc
1210to show what kinds of files will normally be skipped.
1211See the section on the startup file
1212for more info.
1213
1214You can use the
1215.B -all
1216option to indicate you want to consider all files that would otherwise be
1217skipped by the startup file.
1218
1219Based upon various other flags (see "WHICH FILES TO CONSIDER" below),
1220more files might be removed from consideration. For example
1221.nf
1222 -mtime 3
1223.fi
1224will exclude files that aren't at least three days old (change the 3 to -3
1225to exclude files that are more than three days old), while
1226.nf
1227 -skip .*
1228.fi
1229would exclude any file beginning with a dot (of course, '.' and '..' are
1230special and always excluded).
1231
1232If you'd like to see what files are being excluded, and why, you can get the
1233list via the
1234.B \-why
1235option.
1236
1237If a file makes it past all the checks, it is then "considered".
1238This usually means it is greped for the regular expressions you gave
1239on the command line.
1240
1241If any of the regexes match a line, the line is printed.
1242However, if
1243.B -list
1244is given, just the filename is printed. Or, if
1245.B -nice
1246is given, a somewhat more (human-)readable output is generated.
1247
1248If you're searching a huge tree and want to keep informed about how
1249the search is progressing,
1250.B -v
1251will print (to stderr) the current directory being searched.
1252Using
1253.B -vv
1254will also print the current file "every so often", which could be useful
1255if a directory is huge. Using
1256.B -vvv
1257will print the update with every file.
1258
1259Below is the full listing of options.
1260
1261.SH "OPTIONS TELLING *WHERE* TO SEARCH"
1262.TP
1263.BI -dir " DIR"
1264Start searching at the named directory instead of the current directory.
1265If multiple
1266.B -dir
1267arguments are given, multiple trees will be searched.
1268.TP
1269.BI -ddir " DIR"
1270Like
1271.B -dir
1272except it flushes any previous
1273.B -dir
1274directories (i.e. "-dir A -dir B -dir C" will search A, B, and C, while
1275"-dir A -ddir B -dir C" will search only B and C. This might be of use
1276in the startup file (see that section below).
1277.TP
1278.B -xdev
1279Stay on the same filesystem as the starting directory/directories.
1280.TP
1281.B -sort
1282Sort the items in a directory before processing them.
1283Normally they are processed in whatever order they happen to be read from
1284the directory.
1285.TP
1286.B -nolinks
1287Don't follow symbolic links. Normally they're followed.
1288
1289.SH "OPTIONS CONTROLLING WHICH FILES TO CONSIDER AND EXCLUDE"
1290.TP
1291.BI -mtime " NUM"
1292Only consider files that were last changed more than
1293.I NUM
1294days ago
1295(less than
1296.I NUM
1297days if
1298.I NUM
1299has '-' prepended, i.e. "-mtime -2.5" means to consider files that
1300have been changed in the last two and a half days).
1301.TP
1302.B -older FILE
1303Only consider files that have not changed since
1304.I FILE
1305was last changed.
1306If there is any upper case in the "-older", "or equal" is added to the sense
1307of the test. Therefore, "search -older ./file regex" will never consider
1308"./file", while "search -Older ./file regex" will.
1309
1310If a file is a symbolic link, the time used is that of the file and not the
1311link.
1312.TP
1313.BI -newer " FILE"
1314Opposite of
1315.BR -older .
1316.TP
1317.BI -name " GLOB"
1318Only consider files that match the shell filename pattern
1319.IR GLOB .
1320The check is only done on a file's name (use
1321.B -path
1322to check the whole path, and use
1323.B -dname
1324to check directory names).
1325
1326Multiple specifications can be given by separating them with spaces, a'la
1327.nf
1328 -name '*.c *.h'
1329.fi
1330to consider C source and header files.
1331If
1332.I GLOB
1333doesn't contain any special pattern characters, a '*' is prepended.
1334This last example could have been given as
1335.nf
1336 -name '.c .h'
1337.fi
1338It could also be given as
1339.nf
1340 -name .c -name .h
1341.fi
1342or
1343.nf
1344 -name '*.c' -name '*.h'
1345.fi
1346or
1347.nf
1348 -name '*.[ch]'
1349.fi
1350(among others)
1351but in this last case, you have to be sure to supply the leading '*'.
1352.TP
1353.BI -path " GLOB"
1354Like
1355.B -name
1356except the entire path is checked against the pattern.
1357.TP
1358.B -regex " REGEX"
1359Considers files whose names (not paths) match the given perl regex
1360exactly.
1361.TP
1362.BI -iname " GLOB"
1363Case-insensitive version of
1364.BR -name .
1365.TP
1366.BI -ipath " GLOB"
1367Case-insensitive version of
1368.BR -path .
1369.TP
1370.BI -iregex " REGEX"
1371Case-insensitive version of
1372.BR -regex .
1373
1374.TP
1375.BI -dpath " GLOB"
1376Only search down directories whose path matches the given pattern (this
1377doesn't apply to the initial directory given by
1378.BI -dir ,
1379of course).
1380Something like
1381.nf
1382 -dir /usr/man -dpath /usr/man/man*
1383.fi
1384would completely skip
1385"/usr/man/cat1", "/usr/man/cat2", etc.
1386.TP
1387.BI -dskip " GLOB"
1388Skips directories whose name (not path) matches the given pattern.
1389Something like
1390.nf
1391 -dir /usr/man -dskip cat*
1392.fi
1393would completely skip any directory in the tree whose name begins with "cat"
1394(including "/usr/man/cat1", "/usr/man/cat2", etc.).
1395.TP
1396.BI -dregex " REGEX"
1397Like
1398.BI -dpath ,
1399but the pattern is a full perl regex. Note that this quite different
1400from
1401.B -regex
1402which considers only file names (not paths). This option considers
1403full directory paths (not just names). It's much more useful this way.
1404Sorry if it's confusing.
1405.TP
1406.BI -dpath " GLOB"
1407This option exists, but is probably not very useful. It probably wants to
1408be like the '-below' or something I mention in the "TODO" section.
1409.TP
1410.BI -idpath " GLOB"
1411Case-insensitive version of
1412.BR -dpath .
1413.TP
1414.BI -idskip " GLOB"
1415Case-insensitive version of
1416.BR -dskip .
1417.TP
1418.BI -idregex " REGEX"
1419Case-insensitive version of
1420.BR -dregex .
1421.TP
1422.B -all
1423Ignore any 'magic' or 'option' lines in the startup file.
1424The effect is that all files that would otherwise be automatically
1425excluded are considered.
1426.TP
1427.BI -x SPECIAL
1428Arguments starting with
1429.B -x
1430(except
1431.BR -xdev ,
1432explained elsewhere) do special interaction with the
1433.I ~/.search
1434startup file. Something like
1435.nf
1436 -xflag1 -xflag2
1437.fi
1438will turn on "flag1" and "flag2" in the startup file (and is
1439the same as "-xflag1,flag2"). You can use this to write your own
1440rules for what kinds of files are to be considered.
1441
1442For example, the internal-default startup file contains the line
1443.nf
1444 <!~> option: -skip '~ #'
1445.fi
1446This means that if the
1447.B -x~
1448flag is
1449.I not
1450seen, the option
1451.nf
1452 -skip '~ #'
1453.fi
1454should be done.
1455The effect is that emacs temp and backup files are not normally
1456considered, but you can included them with the -x~ flag.
1457
1458You can write your own rules to customize
1459.I search
1460in powerful ways. See the STARTUP FILE section below.
1461.TP
1462.B -why
1463Print a message (to stderr) when and why a file is not considered.
1464
1465.SH "OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED"
1466.TP
1467.B -find
1468(you can use
1469.B -f
1470as well).
1471This option changes the basic action of
1472.IR search .
1473
1474Normally, if a file is considered, it is searched
1475for the regular expressions as described earlier. However, if this option
1476is given, the filename is printed and no searching takes place. This turns
1477.I search
1478into a 'find' of some sorts.
1479
1480In this case, no regular expressions are needed on the command line
1481(any that are there are silently ignored).
1482
1483This is not intended to be a replacement for the 'find' program,
1484but to aid
1485you in understanding just what files are getting past the exclusion checks.
1486If you really want to use it as a sort of replacement for the 'find' program,
1487you might want to use
1488.B -all
1489so that it doesn't waste time checking to see if the file is binary, etc
1490(unless you really want that, of course).
1491
1492If you use
1493.BR -find ,
1494none of the "GREP-LIKE OPTIONS" (below) matter.
1495
1496As a replacement for 'find',
1497.I search
1498is probably a bit slower (or in the case of GNU find, a lot slower --
1499GNU find is
1500.I unbelievably
1501fast).
1502However, "search -ffind"
1503might be more useful than 'find' when options such as
1504.B -skip
1505are used (at least until 'find' gets such functionality).
1506.TP
1507.B -ffind
1508(or
1509.BR -ff )
1510A faster more 'find'-like find. Does
1511.nf
1512 -find -all -dorep
1513.fi
1514.SH "GREP-LIKE OPTIONS"
1515These options control how a searched file is accessed,
1516and how things are printed.
1517.TP
1518.B -i
1519Ignore letter case when matching.
1520.TP
1521.B -w
1522Consider only whole-word matches ("whole word" as defined by perl's "\\b"
1523regex).
1524.TP
1525.B -u
1526If the regex(es) is/are simple, try to modify them so that they'll work
1527in manpage-like underlined text (i.e. like _^Ht_^Hh_^Hi_^Hs).
1528This is very rudimentary at the moment.
1529.TP
1530.B -list
1531(you can use
1532.B -l
1533too).
1534Don't print matching lines, but the names of files that contain matching
1535lines. This will likely be *much* faster, as special optimizations are
1536made -- particularly with large files.
1537.TP
1538.B -n
1539Pepfix each line by its line number.
1540.TP
1541.B -nice
1542Not a grep-like option, but similar to
1543.BR -list ,
1544so included here.
1545.B -nice
1546will have the output be a bit more human-readable, with matching lines printed
1547slightly indented after the filename, a'la
1548.nf
1549
1550 % search foo
1551 somedir/somefile: line with foo in it
1552 somedir/somefile: some food for thought
1553 anotherdir/x: don't be a buffoon!
1554 %
1555
1556.fi
1557will become
1558.nf
1559
1560 % search -nice foo
1561 somedir/somefile:
1562 line with foo in it
1563 some food for thought
1564 anotherdir/x:
1565 don't be a buffoon!
1566 %
1567
1568.fi
1569This option due to Lionel Cons.
1570.TP
1571.B -nnice
1572Be a bit nicer than
1573.BR -nice .
1574Prefix each file's output by a rule line, and follow with an extra blank line.
1575.TP
1576.B -h
1577Don't prepend each output line with the name of the file
1578(meaningless when
1579.B -find
1580or
1581.B -l
1582are given).
1583
1584.SH "OTHER OPTIONS"
1585.TP
1586.B -help
1587Print the usage information.
1588.TP
1589.B -version
1590Print the version information and quit.
1591.TP
1592.B -v
1593Set the level of message verbosity.
1594.B -v
1595will print a note whenever a new directory is entered.
1596.B -vv
1597will also print a note "every so often". This can be useful to see
1598what's happening when searching huge directories.
1599.B -vvv
1600will print a new with every file.
1601.B -vvvv
1602is
1603-vvv
1604plus
1605.BR -why .
1606.TP
1607.B -e
1608This ends the options, and can be useful if the regex begins with '-'.
1609.TP
1610.B -showrc
1611Shows what is being considered in the startup file, then exits.
1612.TP
1613.B -dorep
1614Normally, an identical file won't be checked twice (even with multiple
1615hard or symbolic links). If you're just trying to do a fast
1616.BR -find ,
1617the bookkeeping to remember which files have been seen is not desirable,
1618so you can eliminate the bookkeeping with this flag.
1619
1620.SH "STARTUP FILE"
1621When
1622.I search
1623starts up, it processes the directives in
1624.IR ~/.search .
1625If no such file exists, a default
1626internal version is used.
1627
1628The internal version looks like:
1629.nf
1630
1631 magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
1632 option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi'
1633 option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu'
1634 <!~> option: -skip '~ #'
1635
1636.fi
1637If you wish to create your own "~/.search",
1638you might consider copying the above, and then working from there.
1639
1640There are two kinds of directives in a startup file: "magic" and "option".
1641.RS 0n
1642.TP
1643OPTION
1644Option lines will automatically do the command-line options given.
1645For example, the line
1646.nf
1647 option: -v
1648.fi
1649in you startup file will turn on -v every time, without needing to type it
1650on the command line.
1651
1652The text on the line after the "option:" directive is processed
1653like the Bourne shell, so make sure to pay attention to quoting.
1654.nf
1655 option: -skip .exe .com
1656.fi
1657will give an error (".com" by itself isn't a valid option), while
1658.nf
1659 option: -skip ".exe .com"
1660.fi
1661will properly include it as part of -skip's argument.
1662
1663.TP
1664MAGIC
1665Magic lines are used to determine if a file should be considered a binary
1666or not (the term "magic" refers to checking a file's magic number). These
1667are described in more detail below.
1668.RE
1669
1670Blank lines and comments (lines beginning with '#') are allowed.
1671
1672If a line begins with <...>, then it's a check to see if the
1673directive on the line should be done or not. The stuff inside the <...>
1674can contain perl's && (and), || (or), ! (not), and parens for grouping,
1675along with "flags" that might be indicated by the user with
1676.BI -x flag
1677options.
1678
1679For example, using "-xfoo" will cause "foo" to be true inside the <...>
1680blocks. Therefore, a line beginning with "<foo>" would be done only when
1681"-xfoo" had been specified, while a line beginning with "<!foo>" would be
1682done only when "-xfoo" is not specified (of course, a line without any <...>
1683is done in either case).
1684
1685A realistic example might be
1686.nf
1687 <!v> -vv
1688.fi
1689This will cause -vv messages to be the default, but allow "-xv" to override.
1690
1691There are a few flags that are set automatically:
1692.RS
1693.TP
1694.B TTY
1695true if the output is to the screen (as opposed to being redirected to a file).
1696You can force this (as with all the other automatic flags) with -xTTY.
1697.TP
1698.B -v
1699True if -v was specified. If -vv was specified, both
1700.B -v
1701and
1702.B -vv
1703flags are true (and so on).
1704.TP
1705.B -nice
1706True if -nice was specified. Same thing about -nnice as for -vv.
1707.PP
1708.TP
1709.B -list
1710true if -list (or -l) was given.
1711.TP
1712.B -dir
1713true if -dir was given.
1714.RE
1715
1716Using this info, you might change the last example to
1717.nf
1718
1719 <!v && !-v> option: -vv
1720
1721.fi
1722The added "&& !-v" means "and if the '-v' option not given".
1723This will allow you to use "-v" alone on the command line, and not
1724have this directive add the more verbose "-vv" automatically.
1725
1726.RS 0
1727Some other examples:
1728.TP
1729<!-dir && !here> option: -dir ~/
1730Effectively make the default directory your home directory (instead of the
1731current directory). Using -dir or -xhere will undo this.
1732.TP
1733<tex> option: -name .tex -dir ~/pub
1734Create '-xtex' to search only "*.tex" files in your ~/pub directory tree.
1735Actually, this could be made a bit better. If you combine '-xtex' and '-dir'
1736on the command line, this directive will add ~/pub to the list, when you
1737probably want to use the -dir directory only. You could do
1738.nf
1739
1740 <tex> option: -name .tex
1741 <tex && !-dir> option: -dir ~/pub
1742.fi
1743
1744to will allow '-xtex' to work as before, but allow a command-line "-dir"
1745to take precedence with respect to ~/pub.
1746.TP
1747<fluff> option: -nnice -sort -i -vvv
1748Combine a few user-friendly options into one '-xfluff' option.
1749.TP
1750<man> option: -ddir /usr/man -v -w
1751When the '-xman' option is given, search "/usr/man" for whole-words
1752(of whatever regex or regexes are given on the command line), with -v.
1753.RE
1754
1755The lines in the startup file are executed from top to bottom, so something
1756like
1757.nf
1758
1759 <both> option: -xflag1 -xflag2
1760 <flag1> option: ...whatever...
1761 <flag2> option: ...whatever...
1762
1763.fi
1764will allow '-xboth' to be the same as '-xflag1 -xflag2' (or '-xflag1,flag2'
1765for that matter). However, if you put the "<both>" line below the others,
1766they will not be true when encountered, so the result would be different
1767(and probably undesired).
1768
1769The "magic" directives are used to determine if a file looks to be binary
1770or not. The form of a magic line is
1771.nf
1772 magic: \fISIZE\fP : \fIPERLCODE\fP
1773.fi
1774where
1775.I SIZE
1776is the number of bytes of the file you need to check, and
1777.I PERLCODE
1778is the code to do the check. Within
1779.IR PERLCODE ,
1780the variable $H will hold at least the first
1781.I SIZE
1782bytes of the file (unless the file is shorter than that, of course).
1783It might hold more bytes. The perl should evaluate to true if the file
1784should be considered a binary.
1785
1786An example might be
1787.nf
1788 magic: 6 : substr($H, 0, 6) eq 'GIF87a'
1789.fi
1790to test for a GIF ("-iskip .gif" is better, but this might be useful
1791if you have images in files without the ".gif" extension).
1792
1793Since the startup file is checked from top to bottom, you can be a bit
1794efficient:
1795.nf
1796 magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a'
1797 magic: 6 : $x6 eq 'GIF89a'
1798.fi
1799You could also write the same thing as
1800.nf
1801 magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a') || ## an old gif, or.. \e
1802 $x6 eq 'GIF89a' ## .. a new one.
1803.fi
1804since newlines may be escaped.
1805
1806The default internal startup file includes
1807.nf
1808 magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/
1809.fi
1810which checks for certain non-printable characters, and catches a large
1811number of binary files, including most system's executables, linkable
1812objects, compressed, tarred, and otherwise folded, spindled, and mutilated
1813files.
1814
1815Another example might be
1816.nf
1817 ## an archive library
1818 magic: 17 : substr($H, 0, 17) eq "!<arch>\en__.SYMDEF"
1819.fi
1820
1821.SH "RETURN VALUE"
1822.I Search
1823returns zero if lines (or files, if appropriate) were found,
1824or if no work was requested (such as with
1825.BR -help ).
1826Returns 1 if no lines (or files) were found.
1827Returns 2 on error.
1828
1829.SH TODO
1830Things I'd like to add some day:
1831.nf
1832 + show surrounding lines (context).
1833 + highlight matched portions of lines.
1834 + add '-and', which can go between regexes to override
1835 the default logical or of the regexes.
1836 + add something like
1837 -below GLOB
1838 which will examine a tree and only consider files that
1839 lie in a directory deeper than one named by the pattern.
1840 + add 'warning' and 'error' directives.
1841 + add 'help' directive.
1842.fi
1843.SH BUGS
1844If -xdev and multiple -dir arguments are given, any file in any of the
1845target filesystems are allowed. It would be better to allow each filesystem
1846for each separate tree.
1847
1848Multiple -dir args might also cause some confusing effects. Doing
1849.nf
1850 -dir some/dir -dir other
1851.fi
1852will search "some/dir" completely, then search "other" completely. This
1853is good. However, something like
1854.nf
1855 -dir some/dir -dir some/dir/more/specific
1856.fi
1857will search "some/dir" completely *except for* "some/dir/more/specific",
1858after which it will return and be searched. Not really a bug, but just sort
1859of odd.
1860
1861File times (for -newer, etc.) of symbolic links are for the file, not the
1862link. This could cause some misunderstandings.
1863
1864Probably more. Please let me know.
1865.SH AUTHOR
1866Jeffrey Friedl, Omron Corp (jfriedl@omron.co.jp)
1867.br
1868http://www.wg.omron.co.jp/cgi-bin/j-e/jfriedl.html
1869
1870.SH "LATEST SOURCE"
1871See http://www.wg.omron.co.jp/~jfriedl/perl/index.html