Commit | Line | Data |
---|---|---|
d444a431 TB |
1 | #!/usr/local/bin/perl -w |
2 | 'di'; | |
3 | 'ig00'; | |
4 | ############################################################################## | |
5 | ## | |
6 | ## search | |
7 | ## | |
8 | ## Jeffrey Friedl (jfriedl@omron.co.jp), Dec 1994. | |
9 | ## Copyright 19.... ah hell, just take it. | |
10 | ## | |
11 | ## BLURB: | |
12 | ## A combo of find and grep -- more or less do a 'grep' on a whole | |
13 | ## directory tree. Fast, with lots of options. Much more powerful than | |
14 | ## the simple "find ... | xargs grep ....". Has a full man page. | |
15 | ## Powerfully customizable. | |
16 | ## | |
17 | ## This file is big, but mostly comments and man page. | |
18 | ## | |
19 | ## See man page for usage info. | |
20 | ## Return value: 2=error, 1=nothing found, 0=something found. | |
21 | ## | |
22 | ||
23 | $version = "950918.5"; | |
24 | ## | |
25 | ## "950918.5"; | |
26 | ## Changed all 'sysread' to 'read' because Linux perl's don't seem | |
27 | ## to like sysread() | |
28 | ## | |
29 | ## "941227.4"; | |
30 | ## Added -n, -u | |
31 | ## | |
32 | ## "941222.3" | |
33 | ## Added -nice (due to Lionel Cons <Lionel.Cons@cern.ch>) | |
34 | ## Removed any leading "./" from name. | |
35 | ## Added default flags for ~/.search, including TTY, -nice, -list, etc. | |
36 | ## Program name now has path removed when printed in diagnostics. | |
37 | ## Added simple tilde-expansion to -dir arg. | |
38 | ## Added -dskip, etc. Fixed -iregex bug. | |
39 | ## Changed -dir to be additive, adding -ddir. | |
40 | ## Now screen out devices, pipes, and sockets. | |
41 | ## More tidying and lots of expanding of the man page | |
42 | ## | |
43 | ## | |
44 | ## "941217.2"; | |
45 | ## initial release. | |
46 | ||
47 | $stripped=0; | |
48 | ||
49 | &init; | |
2eb25c99 JH |
50 | if (exists $ENV{'HOME'}) { |
51 | $rc_file = join('/', $ENV{'HOME'}, ".search"); | |
52 | } | |
53 | else { | |
54 | $rc_file = ""; | |
55 | } | |
d444a431 TB |
56 | |
57 | &check_args; | |
58 | ||
59 | ## Make sure we've got a regex. | |
60 | ## Don't need one if -find or -showrc was specified. | |
61 | $!=2, die "expecting regex arguments.\n" | |
62 | if $FIND_ONLY == 0 && $showrc == 0 && @ARGV == 0; | |
63 | ||
64 | &prepare_to_search($rc_file); | |
65 | ||
66 | &import_program if !defined &dodir; ## BIG key to speed. | |
67 | ||
68 | ## do search while there are directories to be done. | |
69 | &dodir(shift(@todo)) while @todo; | |
70 | ||
71 | &clear_message if $VERBOSE && $STDERR_IS_TTY; | |
72 | exit($retval); | |
73 | ############################################################################### | |
74 | ||
75 | sub init | |
76 | { | |
77 | ## initialize variables that might be reset by command-line args | |
78 | $DOREP=0; ## set true by -dorep (redo multi-hardlink files) | |
1c1c7f20 | 79 | $DOREP=1 if $^O eq 'MSWin32'; |
d444a431 TB |
80 | $DO_SORT=0; ## set by -sort (sort files in a dir before checking) |
81 | $FIND_ONLY=0; ## set by -find (don't search files) | |
82 | $LIST_ONLY=0; ## set true by -l (list filenames only) | |
83 | $NEWER=0; ## set by -newer, "-mtime -###" | |
84 | $NICE=0; ## set by -nice (print human-readable output) | |
85 | $NOLINKS=0; ## set true by -nolinks (don't follow symlinks) | |
86 | $OLDER=0; ## set by -older, "-mtime ###" | |
87 | $PREPEND_FILENAME=1; ## set false by -h (don't prefix lines with filename) | |
88 | $REPORT_LINENUM=0; ## set true by -n (show line numbers) | |
89 | $VERBOSE=0; ## set to a value by -v, -vv, etc. (verbose messages) | |
90 | $WHY=0; ## set true by -why, -vvv+ (report why skipped) | |
91 | $XDEV=0; ## set true by -xdev (stay on one filesystem) | |
92 | $all=0; ## set true by -all (don't skip many kinds of files) | |
93 | $iflag = ''; ## set to 'i' by -i (ignore case); | |
94 | $norc=0; ## set by -norc (don't load rc file) | |
95 | $showrc=0; ## set by -showrc (show what happens with rc file) | |
96 | $underlineOK=0; ## set true by -u (watch for underline stuff) | |
97 | $words=0; ## set true by -w (match whole-words only) | |
98 | $DELAY=0; ## inter-file delay (seconds) | |
99 | $retval=1; ## will set to 0 if we find anything. | |
100 | ||
101 | ## various elements of stat() that we might access | |
102 | $STAT_DEV = 1; | |
103 | $STAT_INODE = 2; | |
104 | $STAT_MTIME = 9; | |
105 | ||
106 | $VV_PRINT_COUNT = 50; ## with -vv, print every VV_PRINT_COUNT files, or... | |
107 | $VV_SIZE = 1024*1024; ## ...every VV_SIZE bytes searched | |
108 | $vv_print = $vv_size = 0; ## running totals. | |
109 | ||
110 | ## set default options, in case the rc file wants them | |
111 | $opt{'TTY'}= 1 if -t STDOUT; | |
112 | ||
113 | ## want to know this for debugging message stuff | |
114 | $STDERR_IS_TTY = -t STDERR ? 1 : 0; | |
115 | $STDERR_SCREWS_STDOUT = ($STDERR_IS_TTY && -t STDOUT) ? 1 : 0; | |
116 | ||
117 | $0 =~ s,.*/,,; ## clean up $0 for any diagnostics we'll be printing. | |
118 | } | |
119 | ||
120 | ## | |
121 | ## Check arguments. | |
122 | ## | |
123 | sub check_args | |
124 | { | |
125 | while (@ARGV && $ARGV[0] =~ m/^-/) | |
126 | { | |
127 | $arg = shift(@ARGV); | |
128 | ||
129 | if ($arg eq '-version' || ($VERBOSE && $arg eq '-help')) { | |
130 | print qq/Jeffrey's file search, version "$version".\n/; | |
131 | exit(0) unless $arg eq '-help'; | |
132 | } | |
133 | if ($arg eq '-help') { | |
134 | print <<INLINE_LITERAL_TEXT; | |
135 | usage: $0 [options] [-e] [PerlRegex ....] | |
136 | OPTIONS TELLING *WHERE* TO SEARCH: | |
137 | -dir DIR start search at the named directory (default is current dir). | |
138 | -xdev stay on starting file system. | |
139 | -sort sort the files in each directory before processing. | |
140 | -nolinks don't follow symbolic links. | |
141 | OPTIONS TELLING WHICH FILES TO EVEN CONSIDER: | |
142 | -mtime # consider files modified > # days ago (-# for < # days old) | |
143 | -newer FILE consider files modified more recently than FILE (also -older) | |
144 | -name GLOB consider files whose name matches pattern (also -regex). | |
145 | -skip GLOB opposite of -name: identifies files to not consider. | |
146 | -path GLOB like -name, but for files whose whole path is described. | |
147 | -dpath/-dregex/-dskip versions for selecting or pruning directories. | |
148 | -all don't skip any files marked to be skipped by the startup file. | |
149 | -x<SPECIAL> (see manual, and/or try -showrc). | |
150 | -why report why a file isn't checked (also implied by -vvvv). | |
151 | OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED: | |
152 | -f | -find just list files (PerlRegex ignored). Default is to grep them. | |
153 | -ff | -ffind Does a faster -find (implies -find -all -dorep) | |
154 | OPTIONS CONTROLLING HOW THE SEARCH IS DONE (AND WHAT IS PRINTED): | |
155 | -l | -list only list files with matches, not the lines themselves. | |
156 | -nice | -nnice print more "human readable" output. | |
157 | -n prefix each output line with its line number in the file. | |
158 | -h don't prefix output lines with file name. | |
159 | -u also look "inside" manpage-style underlined text | |
160 | -i do case-insensitive searching. | |
161 | -w match words only (as defined by perl's \\b). | |
162 | OTHER OPTIONS: | |
163 | -v, -vv, -vvv various levels of message verbosity. | |
164 | -e end of options (in case a regex looks like an option). | |
165 | -showrc show what the rc file sets, then exit. | |
166 | -norc don't load the rc file. | |
167 | -dorep check files with multiple hard links multiple times. | |
168 | INLINE_LITERAL_TEXT | |
169 | print "Use -v -help for more verbose help.\n" unless $VERBOSE; | |
170 | print "This script file is also a man page.\n" unless $stripped; | |
171 | print <<INLINE_LITERAL_TEXT if $VERBOSE; | |
172 | ||
173 | If -f (or -find) given, PerlRegex is optional and ignored. | |
174 | Otherwise, will search for files with lines matching any of the given regexes. | |
175 | ||
176 | Combining things like -name and -mtime implies boolean AND. | |
177 | However, duplicating things (such as -name '*.c' -name '*.txt') implies OR. | |
178 | ||
179 | -mtime may be given floating point (i.e. 1.5 is a day and a half). | |
180 | -iskip/-idskip/-ipath/... etc are case-insensitive versions. | |
181 | ||
182 | If any letter in -newer/-older is upper case, "or equal" is | |
183 | inserted into the test. | |
184 | ||
185 | You can always find the latest version on the World Wide Web in | |
186 | http://www.wg.omron.co.jp/~jfriedl/perl/ | |
187 | INLINE_LITERAL_TEXT | |
188 | exit(0); | |
189 | } | |
190 | $DOREP=1, next if $arg eq '-dorep'; ## do repeats | |
191 | $DO_SORT=1, next if $arg eq '-sort'; ## sort files | |
192 | $NOLINKS=1, next if $arg eq '-nolinks'; ## no sym. links | |
193 | $PREPEND_FILENAME=0, next if $arg eq '-h'; ## no filename prefix | |
194 | $REPORT_LINENUM=1, next if $arg eq '-n'; ## show line numbers | |
195 | $WHY=1, next if $arg eq '-why'; ## tell why skipped | |
196 | $XDEV=1, next if $arg eq '-xdev'; ## don't leave F.S. | |
197 | $all=1,$opt{'-all'}=1,next if $arg eq '-all'; ## don't skip *.Z, etc | |
198 | $iflag='i', next if $arg eq '-i'; ## ignore case | |
199 | $norc=1, next if $arg eq '-norc'; ## don't load rc file | |
200 | $showrc=1, next if $arg eq '-showrc'; ## show rc file | |
201 | $underlineOK=1, next if $arg eq '-u'; ## look throuh underln. | |
202 | $words=1, next if $arg eq '-w'; ## match "words" only | |
203 | &strip if $arg eq '-strip'; ## dump this program | |
204 | last if $arg eq '-e'; | |
205 | $DELAY=$1, next if $arg =~ m/-delay(\d+)/; | |
206 | ||
207 | $FIND_ONLY=1, next if $arg =~/^-f(ind)?$/;## do "find" only | |
208 | ||
209 | $FIND_ONLY=1, $DOREP=1, $all=1, | |
210 | next if $arg =~/^-ff(ind)?$/;## fast -find | |
211 | $LIST_ONLY=1,$opt{'-list'}=1, | |
212 | next if $arg =~/^-l(ist)?$/;## only list files | |
213 | ||
214 | if ($arg =~ m/^-(v+)$/) { ## verbosity | |
215 | $VERBOSE =length($1); | |
216 | foreach $len (1..$VERBOSE) { $opt{'-'.('v' x $len)}=1 } | |
217 | next; | |
218 | } | |
219 | if ($arg =~ m/^-(n+)ice$/) { ## "nice" output | |
220 | $NICE =length($1); | |
221 | foreach $len (1..$NICE) { $opt{'-'.('n' x $len).'ice'}=1 } | |
222 | next; | |
223 | } | |
224 | ||
225 | if ($arg =~ m/^-(i?)(d?)skip$/) { | |
226 | local($i) = $1 eq 'i'; | |
227 | local($d) = $2 eq 'd'; | |
228 | $! = 2, die qq/$0: expecting glob arg to -$arg\n/ unless @ARGV; | |
229 | foreach (split(/\s+/, shift @ARGV)) { | |
230 | if ($d) { | |
231 | $idskip{$_}=1 if $i; | |
232 | $dskip{$_}=1; | |
233 | } else { | |
234 | $iskip{$_}=1 if $i; | |
235 | $skip{$_}=1; | |
236 | } | |
237 | } | |
238 | next; | |
239 | } | |
240 | ||
241 | ||
242 | if ($arg =~ m/^-(i?)(d?)(regex|path|name)$/) { | |
243 | local($i) = $1 eq 'i'; | |
244 | $! = 2, die qq/$0: expecting arg to -$arg\n/ unless @ARGV; | |
245 | foreach (split(/\s+/, shift @ARGV)) { | |
246 | $iname{join(',', $arg, $_)}=1 if $i; | |
247 | $name{join(',', $arg, $_)}=1; | |
248 | } | |
249 | next; | |
250 | } | |
251 | ||
252 | if ($arg =~ m/^-d?dir$/) { | |
253 | $opt{'-dir'}=1; | |
254 | $! = 2, die qq/$0: expecting filename arg to -$arg\n/ unless @ARGV; | |
255 | $start = shift(@ARGV); | |
256 | $start =~ s#^~(/+|$)#$ENV{'HOME'}$1# if defined $ENV{'HOME'}; | |
257 | $! = 2, die qq/$0: can't find ${arg}'s "$start"\n/ unless -e $start; | |
258 | $! = 2, die qq/$0: ${arg}'s "$start" not a directory.\n/ unless -d _; | |
259 | undef(@todo), $opt{'-ddir'}=1 if $arg eq '-ddir'; | |
260 | push(@todo, $start); | |
261 | next; | |
262 | } | |
263 | ||
264 | if ($arg =~ m/^-(new|old)er$/i) { | |
265 | $! = 2, die "$0: expecting filename arg to -$arg\n" unless @ARGV; | |
266 | local($file, $time) = shift(@ARGV); | |
267 | $! = 2, die qq/$0: can't stat -${arg}'s "$file"./ | |
268 | unless $time = (stat($file))[$STAT_MTIME]; | |
269 | local($upper) = $arg =~ tr/A-Z//; | |
270 | if ($arg =~ m/new/i) { | |
271 | $time++ unless $upper; | |
272 | $NEWER = $time if $NEWER < $time; | |
273 | } else { | |
274 | $time-- unless $upper; | |
275 | $OLDER = $time if $OLDER == 0 || $OLDER > $time; | |
276 | } | |
277 | next; | |
278 | } | |
279 | ||
280 | if ($arg =~ m/-mtime/) { | |
281 | $! = 2, die "$0: expecting numerical arg to -$arg\n" unless @ARGV; | |
282 | local($days) = shift(@ARGV); | |
283 | $! = 2, die qq/$0: inappropriate arg ($days) to $arg\n/ if $days==0; | |
284 | $days *= 3600 * 24; | |
285 | if ($days < 0) { | |
286 | local($time) = $^T + $days; | |
287 | $NEWER = $time if $NEWER < $time; | |
288 | } else { | |
289 | local($time) = $^T - $days; | |
290 | $OLDER = $time if $OLDER == 0 || $OLDER > $time; | |
291 | } | |
292 | next; | |
293 | } | |
294 | ||
295 | ## special user options | |
296 | if ($arg =~ m/^-x(.+)/) { | |
297 | foreach (split(/[\s,]+/, $1)) { $user_opt{$_} = $opt{$_}= 1; } | |
298 | next; | |
299 | } | |
300 | ||
301 | $! = 2, die "$0: unknown arg [$arg]\n"; | |
302 | } | |
303 | } | |
304 | ||
305 | ## | |
306 | ## Given a filename glob, return a regex. | |
307 | ## If the glob has no globbing chars (no * ? or [..]), then | |
308 | ## prepend an effective '*' to it. | |
309 | ## | |
310 | sub glob_to_regex | |
311 | { | |
312 | local($glob) = @_; | |
313 | local(@parts) = $glob =~ m/\\.|[*?]|\[]?[^]]*]|[^[\\*?]+/g; | |
314 | local($trueglob)=0; | |
315 | foreach (@parts) { | |
316 | if ($_ eq '*' || $_ eq '?') { | |
317 | $_ = ".$_"; | |
318 | $trueglob=1; ## * and ? are a real glob | |
319 | } elsif (substr($_, 0, 1) eq '[') { | |
320 | $trueglob=1; ## [..] is a real glob | |
321 | } else { | |
322 | s/^\\//; ## remove any leading backslash; | |
323 | s/\W/\\$&/g; ## now quote anything dangerous; | |
324 | } | |
325 | } | |
326 | unshift(@parts, '.*') unless $trueglob; | |
327 | join('', '^', @parts, '$'); | |
328 | } | |
329 | ||
330 | sub prepare_to_search | |
331 | { | |
332 | local($rc_file) = @_; | |
333 | ||
334 | $HEADER_BYTES=0; ## Might be set nonzero in &read_rc; | |
335 | $last_message_length = 0; ## For &message and &clear_message. | |
336 | ||
337 | &read_rc($rc_file, $showrc) unless $norc; | |
338 | exit(0) if $showrc; | |
339 | ||
340 | $NEXT_DIR_ENTRY = $DO_SORT ? 'shift @files' : 'readdir(DIR)'; | |
341 | $WHY = 1 if $VERBOSE > 3; ## Arg -vvvv or above implies -why. | |
342 | @todo = ('.') if @todo == 0; ## Where we'll start looking | |
343 | ||
344 | ## see if any user options were specified that weren't accounted for | |
345 | foreach $opt (keys %user_opt) { | |
346 | next if defined $seen_opt{$opt}; | |
347 | warn "warning: -x$opt never considered.\n"; | |
348 | } | |
349 | ||
350 | die "$0: multiple time constraints exclude all possible files.\n" | |
351 | if ($NEWER && $OLDER) && ($NEWER > $OLDER); | |
352 | ||
353 | ## | |
354 | ## Process any -skip/-iskip args that had been given | |
355 | ## | |
356 | local(@skip_test); | |
357 | foreach $glob (keys %skip) { | |
358 | $i = defined($iskip{$glob}) ? 'i': ''; | |
359 | push(@skip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i"); | |
360 | } | |
361 | if (@skip_test) { | |
362 | $SKIP_TEST = join('||',@skip_test); | |
363 | $DO_SKIP_TEST = 1; | |
364 | } else { | |
365 | $DO_SKIP_TEST = $SKIP_TEST = 0; | |
366 | } | |
367 | ||
368 | ## | |
369 | ## Process any -dskip/-idskip args that had been given | |
370 | ## | |
371 | local(@dskip_test); | |
372 | foreach $glob (keys %dskip) { | |
373 | $i = defined($idskip{$glob}) ? 'i': ''; | |
374 | push(@dskip_test, '$name =~ m/'. &glob_to_regex($glob). "/$i"); | |
375 | } | |
376 | if (@dskip_test) { | |
377 | $DSKIP_TEST = join('||',@dskip_test); | |
378 | $DO_DSKIP_TEST = 1; | |
379 | } else { | |
380 | $DO_DSKIP_TEST = $DSKIP_TEST = 0; | |
381 | } | |
382 | ||
383 | ||
384 | ## | |
385 | ## Process any -name, -path, -regex, etc. args that had been given. | |
386 | ## | |
387 | undef @name_test; | |
388 | undef @dname_test; | |
389 | foreach $key (keys %name) { | |
390 | local($type, $pat) = split(/,/, $key, 2); | |
391 | local($i) = defined($iname{$key}) ? 'i' : ''; | |
392 | if ($type =~ /regex/) { | |
393 | $pat =~ s/!/\\!/g; | |
394 | $test = "\$name =~ m!^$pat\$!$i"; | |
395 | } else { | |
396 | local($var) = $type eq 'name' ? '$name' : '$file'; | |
397 | $test = "$var =~ m/". &glob_to_regex($pat). "/$i"; | |
398 | } | |
399 | if ($type =~ m/^-i?d/) { | |
400 | push(@dname_test, $test); | |
401 | } else { | |
402 | push(@name_test, $test); | |
403 | } | |
404 | } | |
405 | if (@name_test) { | |
406 | $GLOB_TESTS = join('||', @name_test); | |
407 | ||
408 | $DO_GLOB_TESTS = 1; | |
409 | } else { | |
410 | $GLOB_TESTS = $DO_GLOB_TESTS = 0; | |
411 | } | |
412 | if (@dname_test) { | |
413 | $DGLOB_TESTS = join('||', @dname_test); | |
414 | $DO_DGLOB_TESTS = 1; | |
415 | } else { | |
416 | $DGLOB_TESTS = $DO_DGLOB_TESTS = 0; | |
417 | } | |
418 | ||
419 | ||
420 | ## | |
421 | ## Process any 'magic' things from the startup file. | |
422 | ## | |
423 | if (@magic_tests && $HEADER_BYTES) { | |
424 | ## the $magic' one is for when &dodir is not inlined | |
425 | $tests = join('||',@magic_tests); | |
426 | $MAGIC_TESTS = " { package magic; \$val = ($tests) }"; | |
427 | $DO_MAGIC_TESTS = 1; | |
428 | } else { | |
429 | $MAGIC_TESTS = 1; | |
430 | $DO_MAGIC_TESTS = 0; | |
431 | } | |
432 | ||
433 | ## | |
434 | ## Prepare regular expressions. | |
435 | ## | |
436 | { | |
437 | local(@regex_tests); | |
438 | ||
439 | if ($LIST_ONLY) { | |
440 | $mflag = ''; | |
441 | ## need to have $* set, but perl5 just won''t shut up about it. | |
442 | if ($] >= 5) { | |
443 | $mflag = 'm'; | |
444 | } else { | |
445 | eval ' $* = 1 '; | |
446 | } | |
447 | } | |
448 | ||
449 | ## | |
450 | ## Until I figure out a better way to deal with it, | |
451 | ## We have to worry about a regex like [^xyz] when doing $LIST_ONLY. | |
452 | ## Such a regex *will* match \n, and if I'm pulling in multiple | |
453 | ## lines, it can allow lines to match that would otherwise not match. | |
454 | ## | |
455 | ## Therefore, if there is a '[^' in a regex, we can NOT take a chance | |
456 | ## an use the fast listonly. | |
457 | ## | |
458 | $CAN_USE_FAST_LISTONLY = $LIST_ONLY; | |
459 | ||
460 | local(@extra); | |
461 | local($underline_glue) = ($] >= 5) ? '(:?_\cH)?' : '(_\cH)?'; | |
462 | while (@ARGV) { | |
463 | $regex = shift(@ARGV); | |
464 | ## | |
465 | ## If watching for underlined things too, add another regex. | |
466 | ## | |
467 | if ($underlineOK) { | |
468 | if ($regex =~ m/[?*+{}()\\.|^\$[]/) { | |
469 | warn "$0: warning, can't underline-safe ``$regex''.\n"; | |
470 | } else { | |
471 | $regex = join($underline_glue, split(//, $regex)); | |
472 | } | |
473 | } | |
474 | ||
475 | ## If nothing special in the regex, just use index... | |
476 | ## is quite a bit faster. | |
477 | if (($iflag eq '') && ($words == 0) && | |
478 | $regex !~ m/[?*+{}()\\.|^\$[]/) | |
479 | { | |
480 | push(@regex_tests, "(index(\$_, q+$regex+)>=0)"); | |
481 | ||
482 | } else { | |
483 | $regex =~ s#[\$\@\/]\w#\\$&#; | |
484 | if ($words) { | |
485 | if ($regex =~ m/\|/) { | |
486 | ## could be dangerous -- see if we can wrap in parens. | |
487 | if ($regex =~ m/\\\d/) { | |
488 | warn "warning: -w and a | in a regex is dangerous.\n" | |
489 | } else { | |
490 | $regex = join($regex, '(', ')'); | |
491 | } | |
492 | } | |
493 | $regex = join($regex, '\b', '\b'); | |
494 | } | |
495 | $CAN_USE_FAST_LISTONLY = 0 if substr($regex, "[^") >= 0; | |
496 | push(@regex_tests, "m/$regex/$iflag$mflag"); | |
497 | } | |
498 | ||
499 | ## If we're done, but still have @extra to do, get set for that. | |
500 | if (@ARGV == 0 && @extra) { | |
501 | @ARGV = @extra; ## now deal with the extra stuff. | |
502 | $underlineOK = 0; ## but no more of this. | |
503 | undef @extra; ## or this. | |
504 | } | |
505 | } | |
506 | if (@regex_tests) { | |
507 | $REGEX_TEST = join('||', @regex_tests); | |
508 | ## print STDERR $REGEX_TEST, "\n"; exit; | |
509 | } else { | |
510 | ## must be doing -find -- just give something syntactically correct. | |
511 | $REGEX_TEST = 1; | |
512 | } | |
513 | } | |
514 | ||
515 | ## | |
516 | ## Make sure we can read the first item(s). | |
517 | ## | |
518 | foreach $start (@todo) { | |
519 | $! = 2, die qq/$0: can't stat "$start"\n/ | |
520 | unless ($dev,$inode) = (stat($start))[$STAT_DEV,$STAT_INODE]; | |
521 | ||
522 | if (defined $dir_done{"$dev,$inode"}) { | |
523 | ## ignore the repeat. | |
524 | warn(qq/ignoring "$start" (same as "$dir_done{"$dev,$inode"}").\n/) | |
525 | if $VERBOSE; | |
526 | next; | |
527 | } | |
528 | ||
529 | ## if -xdev was given, remember the device. | |
530 | $xdev{$dev} = 1 if $XDEV; | |
531 | ||
532 | ## Note that we won't want to do it again | |
533 | $dir_done{"$dev,$inode"} = $start; | |
534 | } | |
535 | } | |
536 | ||
537 | ||
538 | ## | |
539 | ## See the comment above the __END__ above the 'sub dodir' below. | |
540 | ## | |
541 | sub import_program | |
542 | { | |
543 | sub bad { | |
544 | print STDERR "$0: internal error (@_)\n"; | |
545 | exit 2; | |
546 | } | |
547 | ||
548 | ## Read from data, up to next __END__. This will be &dodir. | |
549 | local($/) = "\n__END__"; | |
550 | $prog = <DATA>; | |
551 | close(DATA); | |
552 | ||
553 | $prog =~ s/\beval\b//g; ## remove any 'eval' | |
554 | ||
555 | ## Inline uppercase $-variables by their current values. | |
556 | if ($] >= 5) { | |
557 | $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/ | |
558 | &bad($1) if !defined ${$main::{$1}}; ${$main::{$1}};/eg; | |
559 | } else { | |
560 | $prog =~ s/\$([A-Z][A-Z0-9_]{2,}\b)/local(*VAR) = $_main{$1}; | |
561 | &bad($1) if !defined $VAR; $VAR;/eg; | |
562 | } | |
563 | ||
564 | eval $prog; ## now do it. This will define &dodir; | |
565 | $!=2, die "$0 internal error: $@\n" if $@; | |
566 | } | |
567 | ||
568 | ########################################################################### | |
569 | ||
570 | ## | |
571 | ## Read the .search file: | |
572 | ## Blank lines and lines that are only #-comments ignored. | |
573 | ## Newlines may be escaped to create long lines | |
574 | ## Other lines are directives. | |
575 | ## | |
576 | ## A directive may begin with an optional tag in the form <...> | |
577 | ## Things inside the <...> are evaluated as with: | |
578 | ## <(this || that) && must> | |
579 | ## will be true if | |
580 | ## -xmust -xthis or -xmust -xthat | |
581 | ## were specified on the command line (order doesn't matter, though) | |
582 | ## A directive is not done if there is a tag and it's false. | |
583 | ## Any characters but whitespace and &|()>,! may appear after an -x | |
584 | ## (although "-xdev" is special). -xmust,this is the same as -xmust -xthis. | |
585 | ## Something like -x~ would make <~> true, and <!~> false. | |
586 | ## | |
587 | ## Directives are in the form: | |
588 | ## option: STRING | |
589 | ## magic : NUMBYTES : EXPR | |
590 | ## | |
591 | ## With option: | |
592 | ## The STRING is parsed like a Bourne shell command line, and the | |
593 | ## options are used as if given on the command line. | |
594 | ## No comments are allowed on 'option' lines. | |
595 | ## Examples: | |
596 | ## # skip objects and libraries | |
597 | ## option: -skip '.o .a' | |
598 | ## # skip emacs *~ and *# files, unless -x~ given: | |
599 | ## <!~> option: -skip '~ #' | |
600 | ## | |
601 | ## With magic: | |
602 | ## EXPR can be pretty much any perl (comments allowed!). | |
603 | ## If it evaluates to true for any particular file, it is skipped. | |
604 | ## The only info you'll have about a file is the variable $H, which | |
605 | ## will have at least the first NUMBYTES of the file (less if the file | |
606 | ## is shorter than that, of course, and maybe more). You'll also have | |
607 | ## any variables you set in previous 'magic' lines. | |
608 | ## Examples: | |
609 | ## magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a' | |
610 | ## magic: 6 : $x6 eq 'GIF89a' | |
611 | ## | |
612 | ## magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a' ## old gif \ | |
613 | ## || $x6 eq 'GIF89a' ## new gif | |
614 | ## (the above two sets are the same) | |
615 | ## ## Check the first 32 bytes for "binarish" looking bytes. | |
616 | ## ## Don't blindly dump on any high-bit set, as non-ASCII text | |
617 | ## ## often has them set. \x80 and \xff seem to be special, though. | |
618 | ## ## Require two in a row to not get things like perl's $^T. | |
619 | ## ## This is known to get *.Z, *.gz, pkzip, *.elc and about any | |
620 | ## ## executable you'll find. | |
621 | ## magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/ | |
622 | ## | |
623 | sub read_rc | |
624 | { | |
625 | local($file, $show) = @_; | |
626 | local($line_num, $ln, $tag) = 0; | |
627 | local($use_default, @default) = 0; | |
628 | ||
e8847ffb | 629 | { package magic; $^W= 0; } ## turn off warnings for when we run EXPR's |
d444a431 TB |
630 | |
631 | unless (open(RC, "$file")) { | |
632 | $use_default=1; | |
633 | $file = "<internal default startup file>"; | |
634 | ## no RC file -- use this default. | |
635 | @default = split(/\n/,<<'--------INLINE_LITERAL_TEXT'); | |
636 | magic: 32 : $H =~ m/[\x00-\x06\x10-\x1a\x1c-\x1f\x80\xff]{2}/ | |
a53a623d JH |
637 | option: -skip '.a .elc .gz .o .pbm .xbm .dvi' |
638 | option: -iskip '.com .exe .lib .pdb .tarz .zip .z .lzh .jpg .jpeg .gif .uu' | |
d444a431 TB |
639 | <!~> option: -skip '~ #' |
640 | --------INLINE_LITERAL_TEXT | |
641 | } | |
642 | ||
643 | ## | |
644 | ## Make an eval error pretty. | |
645 | ## | |
646 | sub clean_eval_error { | |
647 | local($_) = @_; | |
648 | s/ in file \(eval\) at line \d+,//g; ## perl4-style error | |
649 | s/ at \(eval \d+\) line \d+,//g; ## perl5-style error | |
650 | $_ = $` if m/\n/; ## remove all but first line | |
651 | "$_\n"; | |
652 | } | |
653 | ||
654 | print "reading RC file: $file\n" if $show; | |
655 | ||
656 | while (defined($_ = ($use_default ? shift(@default) : <RC>))) { | |
657 | $ln = ++$line_num; ## note starting line num. | |
658 | $_ .= <RC>, $line_num++ while s/\\\n?$/\n/; ## allow continuations | |
659 | next if /^\s*(#.*)?$/; ## skip blank or comment-only lines. | |
660 | $do = ''; | |
661 | ||
662 | ## look for an initial <...> tag. | |
663 | if (s/^\s*<([^>]*)>//) { | |
664 | ## This simple s// will make the tag ready to eval. | |
665 | ($tag = $msg = $1) =~ | |
666 | s/[^\s&|(!)]+/ | |
667 | $seen_opt{$&}=1; ## note seen option | |
668 | "defined(\$opt{q>$&>})" ## (q>> is safe quoting here) | |
669 | /eg; | |
670 | ||
671 | ## see if the tag is true or not, abort this line if not. | |
672 | $dothis = (eval $tag); | |
673 | $!=2, die "$file $ln <$msg>: $_".&clean_eval_error($@) if $@; | |
674 | ||
675 | if ($show) { | |
676 | $msg =~ s/[^\s&|(!)]+/-x$&/; | |
677 | $msg =~ s/\s*!\s*/ no /g; | |
678 | $msg =~ s/\s*&&\s*/ and /g; | |
679 | $msg =~ s/\s*\|\|\s*/ or /g; | |
680 | $msg =~ s/^\s+//; $msg =~ s/\s+$//; | |
681 | $do = $dothis ? "(doing because $msg)" : | |
682 | "(do if $msg)"; | |
683 | } elsif (!$dothis) { | |
684 | next; | |
685 | } | |
686 | } | |
687 | ||
688 | if (m/^\s*option\s*:\s*/) { | |
689 | next if $all && !$show; ## -all turns off these checks; | |
690 | local($_) = $'; | |
691 | s/\n$//; | |
692 | local($orig) = $_; | |
693 | print " $do option: $_\n" if $show; | |
694 | local($0) = "$0 ($file)"; ## for any error message. | |
695 | local(@ARGV); | |
696 | local($this); | |
697 | ## | |
698 | ## Parse $_ as a Bourne shell line -- fill @ARGV | |
699 | ## | |
700 | while (length) { | |
701 | if (s/^\s+//) { | |
702 | push(@ARGV, $this) if defined $this; | |
703 | undef $this; | |
704 | next; | |
705 | } | |
706 | $this = '' if !defined $this; | |
707 | $this .= $1 while s/^'([^']*)'// || | |
708 | s/^"([^"]*)"// || | |
709 | s/^([^'"\s\\]+)//|| | |
710 | s/^(\\[\D\d])//; | |
711 | die "$file $ln: error parsing $orig at $_\n" if m/^\S/; | |
712 | } | |
713 | push(@ARGV, $this) if defined $this; | |
714 | &check_args; | |
715 | die qq/$file $ln: unused arg "@ARGV".\n/ if @ARGV; | |
716 | next; | |
717 | } | |
718 | ||
719 | if (m/^\s*magic\s*:\s*(\d+)\s*:\s*/) { | |
720 | next if $all && !$show; ## -all turns off these checks; | |
721 | local($bytes, $check) = ($1, $'); | |
722 | ||
723 | if ($show) { | |
724 | $check =~ s/\n?$/\n/; | |
725 | print " $do contents: $check"; | |
726 | } | |
727 | ## Check to make sure the thing at least compiles. | |
728 | eval "package magic; (\$H = '1'x \$main'bytes) && (\n$check\n)\n"; | |
729 | $! = 2, die "$file $ln: ".&clean_eval_error($@) if $@; | |
730 | ||
731 | $HEADER_BYTES = $bytes if $bytes > $HEADER_BYTES; | |
732 | push(@magic_tests, "(\n$check\n)"); | |
733 | next; | |
734 | } | |
735 | $! = 2, die "$file $ln: unknown command\n"; | |
736 | } | |
737 | close(RC); | |
738 | } | |
739 | ||
740 | sub message | |
741 | { | |
742 | if (!$STDERR_IS_TTY) { | |
743 | print STDERR $_[0], "\n"; | |
744 | } else { | |
745 | local($text) = @_; | |
746 | $thislength = length($text); | |
747 | if ($thislength >= $last_message_length) { | |
748 | print STDERR $text, "\r"; | |
749 | } else { | |
750 | print STDERR $text, ' 'x ($last_message_length-$thislength),"\r"; | |
751 | } | |
752 | $last_message_length = $thislength; | |
753 | } | |
754 | } | |
755 | ||
756 | sub clear_message | |
757 | { | |
758 | print STDERR ' ' x $last_message_length, "\r" if $last_message_length; | |
759 | $vv_print = $vv_size = $last_message_length = 0; | |
760 | } | |
761 | ||
762 | ## | |
763 | ## Output a copy of this program with comments, extra whitespace, and | |
764 | ## the trailing man page removed. On an ultra slow machine, such a copy | |
765 | ## might load faster (but I can't tell any difference on my machine). | |
766 | ## | |
767 | sub strip { | |
768 | seek(DATA, 0, 0) || die "$0: can't reset internal pointer.\n"; | |
769 | while(<DATA>) { | |
770 | print, next if /INLINE_LITERAL_TEXT/.../INLINE_LITERAL_TEXT/; | |
771 | ## must mention INLINE_LITERAL_TEXT on this line! | |
772 | s/\#\#.*|^\s+|\s+$//; ## remove cruft | |
773 | last if $_ eq '.00;'; | |
774 | next if ($_ eq '') || ($_ eq "'di'") || ($_ eq "'ig00'"); | |
775 | s/\$stripped=0;/\$stripped=1;/; | |
776 | s/\s\s+/ /; ## squish multiple whitespaces down to one. | |
777 | print $_, "\n"; | |
778 | } | |
779 | exit(0); | |
780 | } | |
781 | ||
782 | ## | |
783 | ## Just to shut up -w. Never executed. | |
784 | ## | |
785 | sub dummy { | |
786 | ||
787 | 1 || &dummy || &dir_done || &bad || &message || $NEXT_DIR_ENTRY || | |
788 | $DELAY || $VV_SIZE || $VV_PRINT_COUNT || $STDERR_SCREWS_STDOUT || | |
789 | @files || @files || $magic'H || $magic'H || $xdev{''} || &clear_message; | |
790 | ||
791 | } | |
792 | ||
793 | ## | |
794 | ## If the following __END__ is in place, what follows will be | |
795 | ## inlined when the program first starts up. Any $ variable name | |
796 | ## all in upper case, specifically, any string matching | |
797 | ## \$([A-Z][A-Z0-9_]{2,}\b | |
798 | ## will have the true value for that variable inlined. Also, any 'eval' is | |
799 | ## removed | |
800 | ## | |
801 | ## The idea is that when the whole thing is then eval'ed to define &dodir, | |
802 | ## the perl optimizer will make all the decisions that are based upon | |
803 | ## command-line options (such as $VERBOSE), since they'll be inlined as | |
804 | ## constants | |
805 | ## | |
806 | ## Also, and here's the big win, the tests for matching the regex, and a | |
807 | ## few others, are all inlined. Should be blinding speed here. | |
808 | ## | |
809 | ## See the read from <DATA> above for where all this takes place. | |
810 | ## But all-in-all, you *want* the __END__ here. Comment it out only for | |
811 | ## debugging.... | |
812 | ## | |
813 | ||
814 | __END__ | |
815 | ||
816 | ## | |
817 | ## Given a directory, check all "appropriate" files in it. | |
818 | ## Shove any subdirectories into the global @todo, so they'll be done | |
819 | ## later. | |
820 | ## | |
821 | ## Be careful about adding any upper-case variables, as they are subject | |
822 | ## to being inlined. See comments above the __END__ above. | |
823 | ## | |
824 | sub dodir | |
825 | { | |
826 | local($dir) = @_; | |
827 | $dir =~ s,/+$,,; ## remove any trailing slash. | |
828 | unless (opendir(DIR, "$dir/.")) { | |
829 | &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; | |
830 | warn qq($0: can't opendir "$dir/".\n); | |
831 | return; | |
832 | } | |
833 | ||
834 | if ($VERBOSE) { | |
835 | &message($dir); | |
836 | $vv_print = $vv_size = 0; | |
837 | } | |
838 | ||
839 | @files = sort readdir(DIR) if $DO_SORT; | |
840 | ||
841 | while (defined($name = eval $NEXT_DIR_ENTRY)) | |
842 | { | |
843 | next if $name eq '.' || $name eq '..'; ## never follow these. | |
844 | ||
845 | ## create full relative pathname. | |
846 | $file = $dir eq '.' ? $name : "$dir/$name"; | |
847 | ||
848 | ## if link and skipping them, do so. | |
849 | if ($NOLINKS && -l $file) { | |
850 | warn qq/skip (symlink): $file\n/ if $WHY; | |
851 | next; | |
852 | } | |
853 | ||
854 | ## skip things unless files or directories | |
855 | unless (-f $file || -d _) { | |
856 | if ($WHY) { | |
857 | $why = (-S _ && "socket") || | |
858 | (-p _ && "pipe") || | |
859 | (-b _ && "block special")|| | |
860 | (-c _ && "char special") || "somekinda special"; | |
861 | warn qq/skip ($why): $file\n/; | |
862 | } | |
863 | next; | |
864 | } | |
865 | ||
866 | ## skip things we can't read | |
867 | unless (-r _) { | |
868 | if ($WHY) { | |
869 | $why = (-l $file) ? "follow" : "read"; | |
870 | warn qq/skip (can't $why): $file\n/; | |
871 | } | |
872 | next; | |
873 | } | |
874 | ||
875 | ## skip things that are empty | |
1c1c7f20 | 876 | unless (-s _ || -d _) { |
d444a431 TB |
877 | warn qq/skip (empty): $file\n/ if $WHY; |
878 | next; | |
879 | } | |
880 | ||
881 | ## Note file device & inode. If -xdev, skip if appropriate. | |
882 | ($dev, $inode) = (stat(_))[$STAT_DEV, $STAT_INODE]; | |
883 | if ($XDEV && defined $xdev{$dev}) { | |
884 | warn qq/skip (other device): $file\n/ if $WHY; | |
885 | next; | |
886 | } | |
887 | $id = "$dev,$inode"; | |
888 | ||
889 | ## special work for a directory | |
890 | if (-d _) { | |
891 | ## Do checks for directory file endings. | |
892 | if ($DO_DSKIP_TEST && (eval $DSKIP_TEST)) { | |
893 | warn qq/skip (-dskip): $file\n/ if $WHY; | |
894 | next; | |
895 | } | |
896 | ## do checks for -name/-regex/-path tests | |
897 | if ($DO_DGLOB_TESTS && !(eval $DGLOB_TESTS)) { | |
898 | warn qq/skip (dirname): $file\n/ if $WHY; | |
899 | next; | |
900 | } | |
901 | ||
902 | ## _never_ redo a directory | |
1c1c7f20 | 903 | if (defined $dir_done{$id} and $^O ne 'MSWin32') { |
d444a431 TB |
904 | warn qq/skip (did as "$dir_done{$id}"): $file\n/ if $WHY; |
905 | next; | |
906 | } | |
907 | $dir_done{$id} = $file; ## mark it done. | |
908 | unshift(@todo, $file); ## add to the list to do. | |
909 | next; | |
910 | } | |
911 | if ($WHY == 0 && $VERBOSE > 1) { | |
912 | if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){ | |
913 | &message($file); | |
914 | $vv_print = $vv_size = 0; | |
915 | } | |
916 | } | |
917 | ||
918 | ## do time-related tests | |
919 | if ($NEWER || $OLDER) { | |
920 | $_ = (stat(_))[$STAT_MTIME]; | |
921 | if ($NEWER && $_ < $NEWER) { | |
922 | warn qq/skip (too old): $file\n/ if $WHY; | |
923 | next; | |
924 | } | |
925 | if ($OLDER && $_ > $OLDER) { | |
926 | warn qq/skip (too new): $file\n/ if $WHY; | |
927 | next; | |
928 | } | |
929 | } | |
930 | ||
931 | ## do checks for file endings | |
932 | if ($DO_SKIP_TEST && (eval $SKIP_TEST)) { | |
933 | warn qq/skip (-skip): $file\n/ if $WHY; | |
934 | next; | |
935 | } | |
936 | ||
937 | ## do checks for -name/-regex/-path tests | |
938 | if ($DO_GLOB_TESTS && !(eval $GLOB_TESTS)) { | |
939 | warn qq/skip (filename): $file\n/ if $WHY; | |
940 | next; | |
941 | } | |
942 | ||
943 | ||
944 | ## If we're not repeating files, | |
945 | ## skip this one if we've done it, or note we're doing it. | |
946 | unless ($DOREP) { | |
947 | if (defined $file_done{$id}) { | |
948 | warn qq/skip (did as "$file_done{$id}"): $file\n/ if $WHY; | |
949 | next; | |
950 | } | |
951 | $file_done{$id} = $file; | |
952 | } | |
953 | ||
954 | if ($DO_MAGIC_TESTS) { | |
955 | if (!open(FILE_IN, $file)) { | |
956 | &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; | |
957 | warn qq/$0: can't open: $file\n/; | |
958 | next; | |
959 | } | |
960 | unless (read(FILE_IN, $magic'H, $HEADER_BYTES)) { | |
961 | &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; | |
962 | warn qq/$0: can't read from "$file"\n"/; | |
963 | close(FILE_IN); | |
964 | next; | |
965 | } | |
966 | ||
967 | eval $MAGIC_TESTS; | |
968 | if ($magic'val) { | |
969 | close(FILE_IN); | |
970 | warn qq/skip (magic): $file\n/ if $WHY; | |
971 | next; | |
972 | } | |
973 | seek(FILE_IN, 0, 0); ## reset for later <FILE_IN> | |
974 | } | |
975 | ||
976 | if ($WHY != 0 && $VERBOSE > 1) { | |
977 | if ($VERBOSE>2||$vv_print++>$VV_PRINT_COUNT||($vv_size+=-s _)>$VV_SIZE){ | |
978 | &message($file); | |
979 | $vv_print = $vv_size = 0; | |
980 | } | |
981 | } | |
982 | ||
983 | if ($DELAY) { | |
984 | sleep($DELAY); | |
985 | } | |
986 | ||
987 | if ($FIND_ONLY) { | |
988 | &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; | |
989 | print $file, "\n"; | |
990 | $retval=0; ## we've found something | |
991 | close(FILE_IN) if $DO_MAGIC_TESTS; | |
992 | next; | |
993 | } else { | |
994 | ## if we weren't doing magic tests, file won't be open yet... | |
995 | if (!$DO_MAGIC_TESTS && !open(FILE_IN, $file)) { | |
996 | &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; | |
997 | warn qq/$0: can't open: $file\n/; | |
998 | next; | |
999 | } | |
1000 | if ($LIST_ONLY && $CAN_USE_FAST_LISTONLY) { | |
1001 | ## | |
1002 | ## This is rather complex, but buys us a LOT when we're just | |
1003 | ## listing files and not the individual internal lines. | |
1004 | ## | |
1005 | local($size) = 4096; ## block-size in which to do reads | |
1006 | local($nl); ## will point to $_'s ending newline. | |
1007 | local($read); ## will be how many bytes read. | |
1008 | local($_) = ''; ## Starts out empty | |
1009 | local($hold); ## (see below) | |
1010 | ||
1011 | while (($read = read(FILE_IN,$_,$size,length($_)))||length($_)) | |
1012 | { | |
1013 | undef @parts; | |
1014 | ## if read a full block, but no newline, need to read more. | |
1015 | while ($read == $size && ($nl = rindex($_, "\n")) < 0) { | |
1016 | push(@parts, $_); ## save that part | |
1017 | $read = read(FILE_IN, $_, $size); ## keep trying | |
1018 | } | |
1019 | ||
1020 | ## | |
1021 | ## If we had to save parts, must now combine them together. | |
1022 | ## adjusting $nl to reflect the now-larger $_. This should | |
1023 | ## be a lot more efficient than using any kind of .= in the | |
1024 | ## loop above. | |
1025 | ## | |
1026 | if (@parts) { | |
1027 | local($lastlen) = length($_); #only need if $nl >= 0 | |
1028 | $_ = join('', @parts, $_); | |
1029 | $nl = length($_) - ($lastlen - $nl) if $nl >= 0; | |
1030 | } | |
1031 | ||
1032 | ## | |
1033 | ## If we're at the end of the file, then we can use $_ as | |
1034 | ## is. Otherwise, we need to remove the final partial-line | |
1035 | ## and save it so that it'll be at the beginning of the | |
1036 | ## next read (where the rest of the line will be layed in | |
1037 | ## right after it). $hold will be what we should save | |
1038 | ## until next time. | |
1039 | ## | |
1040 | if ($read != $size || $nl < 0) { | |
1041 | $hold = ''; | |
1042 | } else { | |
1043 | $hold = substr($_, $nl + 1); | |
1044 | substr($_, $nl + 1) = ''; | |
1045 | } | |
1046 | ||
1047 | ## | |
1048 | ## Now have a bunch of full lines in $_. Use it. | |
1049 | ## | |
1050 | if (eval $REGEX_TEST) { | |
1051 | &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; | |
1052 | print $file, "\n"; | |
1053 | $retval=0; ## we've found something | |
1054 | ||
1055 | last; | |
1056 | } | |
1057 | ||
1058 | ## Prepare for next read.... | |
1059 | $_ = $hold; | |
1060 | } | |
1061 | ||
1062 | } else { ## else not using faster block scanning..... | |
1063 | ||
1064 | $lines_printed = 0 if $NICE; | |
1065 | while (<FILE_IN>) { | |
1066 | study; | |
1067 | next unless (eval $REGEX_TEST); | |
1068 | ||
1069 | ## | |
1070 | ## We found a matching line. | |
1071 | ## | |
1072 | $retval=0; | |
1073 | &clear_message if $VERBOSE && $STDERR_SCREWS_STDOUT; | |
1074 | if ($LIST_ONLY) { | |
1075 | print $file, "\n"; | |
1076 | last; | |
1077 | } else { | |
1078 | ## prepare to print line. | |
1079 | if ($NICE && $lines_printed++ == 0) { | |
1080 | print '-' x 70, "\n" if $NICE > 1; | |
1081 | print $file, ":\n"; | |
1082 | } | |
1083 | ||
1084 | ## | |
1085 | ## Print all the prelim stuff. This looks less efficient | |
1086 | ## than it needs to be, but that's so that when the eval | |
1087 | ## is compiled (and the tests are optimized away), the | |
1088 | ## result will be less actual PRINTs than the more natural | |
1089 | ## way of doing these tests.... | |
1090 | ## | |
1091 | if ($NICE) { | |
1092 | if ($REPORT_LINENUM) { | |
1093 | print " line $.: "; | |
1094 | } else { | |
1095 | print " "; | |
1096 | } | |
1097 | } elsif ($REPORT_LINENUM && $PREPEND_FILENAME) { | |
1098 | print "$file,:$.: "; | |
1099 | } elsif ($PREPEND_FILENAME) { | |
1100 | print "$file: "; | |
1101 | } elsif ($REPORT_LINENUM) { | |
1102 | print "$.: "; | |
1103 | } | |
1104 | print $_; | |
1105 | print "\n" unless m/\n$/; | |
1106 | } | |
1107 | } | |
1108 | print "\n" if ($NICE > 1) && $lines_printed; | |
1109 | } | |
1110 | close(FILE_IN); | |
1111 | } | |
1112 | } | |
1113 | closedir(DIR); | |
1114 | } | |
1115 | ||
1116 | __END__ | |
1117 | .00; ## finish .ig | |
1118 | ||
1119 | 'di \" finish diversion--previous line must be blank | |
1120 | .nr nl 0-1 \" fake up transition to first page again | |
1121 | .nr % 0 \" start at page 1 | |
1122 | .\"__________________NORMAL_MAN_PAGE_BELOW_________________ | |
1123 | .ll+10n | |
1124 | .TH search 1 "Dec 17, 1994" | |
1125 | .SH SEARCH | |
1126 | search \- search files (a'la grep) in a whole directory tree. | |
1127 | .SH SYNOPSIS | |
1128 | search [ grep-like and find-like options] [regex ....] | |
1129 | .SH DESCRIPTION | |
1130 | .I Search | |
1131 | is more or less a combo of 'find' and 'grep' (although the regular | |
1132 | expression flavor is that of the perl being used, which is closer to | |
1133 | egrep's than grep's). | |
1134 | ||
1135 | .I Search | |
1136 | does generally the same kind of thing that | |
1137 | .nf | |
1138 | find <blah blah> | xargs egrep <blah blah> | |
1139 | .fi | |
1140 | does, but is | |
1141 | .I much | |
1142 | more powerful and efficient (and intuitive, I think). | |
1143 | ||
1144 | This manual describes | |
1145 | .I search | |
1146 | as of version "941227.4". You can always find the latest version at | |
1147 | .nf | |
1148 | http://www.wg.omron.co.jp/~jfriedl/perl/index.html | |
1149 | .fi | |
1150 | ||
1151 | .SH "QUICK EXAMPLE" | |
1152 | Basic use is simple: | |
1153 | .nf | |
1154 | % search jeff | |
1155 | .fi | |
1156 | will search files in the current directory, and all sub directories, for | |
1157 | files that have "jeff" in them. The lines will be listed with the | |
1158 | containing file's name prepended. | |
1159 | .PP | |
1160 | If you list more than one regex, such as with | |
1161 | .nf | |
1162 | % search jeff Larry Randal+ 'Stoc?k' 'C.*son' | |
1163 | .fi | |
1164 | then a line containing any of the regexes will be listed. | |
1165 | This makes it effectively the same as | |
1166 | .nf | |
1167 | % search 'jeff|Larry|Randal+|Stoc?k|C.*son' | |
1168 | .fi | |
1169 | However, listing them separately is much more efficient (and is easier | |
1170 | to type). | |
1171 | .PP | |
1172 | Note that in the case of these examples, the | |
1173 | .B \-w | |
1174 | (list whole-words only) option would be useful. | |
1175 | .PP | |
1176 | Normally, various kinds of files are automatically removed from consideration. | |
1177 | If it has has a certain ending (such as ".tar", ".Z", ".o", .etc), or if | |
1178 | the beginning of the file looks like a binary, it'll be excluded. | |
1179 | You can control exactly how this works -- see below. One quick way to | |
1180 | override this is to use the | |
1181 | .B \-all | |
1182 | option, which means to consider all the files that would normally be | |
1183 | automatically excluded. | |
1184 | Or, if you're curious, you can use | |
1185 | .B \-why | |
1186 | to have notes about what files are skipped (and why) printed to stderr. | |
1187 | ||
1188 | .SH "BASIC OVERVIEW" | |
1189 | Normally, the search starts in the current directory, considering files in | |
1190 | all subdirectories. | |
1191 | ||
1192 | You can use the | |
1193 | .I ~/.search | |
1194 | file to control ways to automatically exclude files. | |
1195 | If you don't have this file, a default one will kick in, which automatically | |
1196 | add | |
1197 | .nf | |
1198 | -skip .o .Z .gif | |
1199 | .fi | |
1200 | (among others) to exclude those kinds of files (which you probably want to | |
1201 | skip when searching for text, as is normal). | |
1202 | Files that look to be be binary will also be excluded. | |
1203 | ||
1204 | Files ending with "#" and "~" will also be excluded unless the | |
1205 | .B -x~ | |
1206 | option is given. | |
1207 | ||
1208 | You can use | |
1209 | .B -showrc | |
1210 | to show what kinds of files will normally be skipped. | |
1211 | See the section on the startup file | |
1212 | for more info. | |
1213 | ||
1214 | You can use the | |
1215 | .B -all | |
1216 | option to indicate you want to consider all files that would otherwise be | |
1217 | skipped by the startup file. | |
1218 | ||
1219 | Based upon various other flags (see "WHICH FILES TO CONSIDER" below), | |
1220 | more files might be removed from consideration. For example | |
1221 | .nf | |
1222 | -mtime 3 | |
1223 | .fi | |
1224 | will exclude files that aren't at least three days old (change the 3 to -3 | |
1225 | to exclude files that are more than three days old), while | |
1226 | .nf | |
1227 | -skip .* | |
1228 | .fi | |
1229 | would exclude any file beginning with a dot (of course, '.' and '..' are | |
1230 | special and always excluded). | |
1231 | ||
1232 | If you'd like to see what files are being excluded, and why, you can get the | |
1233 | list via the | |
1234 | .B \-why | |
1235 | option. | |
1236 | ||
1237 | If a file makes it past all the checks, it is then "considered". | |
1238 | This usually means it is greped for the regular expressions you gave | |
1239 | on the command line. | |
1240 | ||
1241 | If any of the regexes match a line, the line is printed. | |
1242 | However, if | |
1243 | .B -list | |
1244 | is given, just the filename is printed. Or, if | |
1245 | .B -nice | |
1246 | is given, a somewhat more (human-)readable output is generated. | |
1247 | ||
1248 | If you're searching a huge tree and want to keep informed about how | |
1249 | the search is progressing, | |
1250 | .B -v | |
1251 | will print (to stderr) the current directory being searched. | |
1252 | Using | |
1253 | .B -vv | |
1254 | will also print the current file "every so often", which could be useful | |
1255 | if a directory is huge. Using | |
1256 | .B -vvv | |
1257 | will print the update with every file. | |
1258 | ||
1259 | Below is the full listing of options. | |
1260 | ||
1261 | .SH "OPTIONS TELLING *WHERE* TO SEARCH" | |
1262 | .TP | |
1263 | .BI -dir " DIR" | |
1264 | Start searching at the named directory instead of the current directory. | |
1265 | If multiple | |
1266 | .B -dir | |
1267 | arguments are given, multiple trees will be searched. | |
1268 | .TP | |
1269 | .BI -ddir " DIR" | |
1270 | Like | |
1271 | .B -dir | |
1272 | except it flushes any previous | |
1273 | .B -dir | |
1274 | directories (i.e. "-dir A -dir B -dir C" will search A, B, and C, while | |
1275 | "-dir A -ddir B -dir C" will search only B and C. This might be of use | |
1276 | in the startup file (see that section below). | |
1277 | .TP | |
1278 | .B -xdev | |
1279 | Stay on the same filesystem as the starting directory/directories. | |
1280 | .TP | |
1281 | .B -sort | |
1282 | Sort the items in a directory before processing them. | |
1283 | Normally they are processed in whatever order they happen to be read from | |
1284 | the directory. | |
1285 | .TP | |
1286 | .B -nolinks | |
1287 | Don't follow symbolic links. Normally they're followed. | |
1288 | ||
1289 | .SH "OPTIONS CONTROLLING WHICH FILES TO CONSIDER AND EXCLUDE" | |
1290 | .TP | |
1291 | .BI -mtime " NUM" | |
1292 | Only consider files that were last changed more than | |
1293 | .I NUM | |
1294 | days ago | |
1295 | (less than | |
1296 | .I NUM | |
1297 | days if | |
1298 | .I NUM | |
1299 | has '-' prepended, i.e. "-mtime -2.5" means to consider files that | |
1300 | have been changed in the last two and a half days). | |
1301 | .TP | |
1302 | .B -older FILE | |
1303 | Only consider files that have not changed since | |
1304 | .I FILE | |
1305 | was last changed. | |
1306 | If there is any upper case in the "-older", "or equal" is added to the sense | |
1307 | of the test. Therefore, "search -older ./file regex" will never consider | |
1308 | "./file", while "search -Older ./file regex" will. | |
1309 | ||
1310 | If a file is a symbolic link, the time used is that of the file and not the | |
1311 | link. | |
1312 | .TP | |
1313 | .BI -newer " FILE" | |
1314 | Opposite of | |
1315 | .BR -older . | |
1316 | .TP | |
1317 | .BI -name " GLOB" | |
1318 | Only consider files that match the shell filename pattern | |
1319 | .IR GLOB . | |
1320 | The check is only done on a file's name (use | |
1321 | .B -path | |
1322 | to check the whole path, and use | |
1323 | .B -dname | |
1324 | to check directory names). | |
1325 | ||
1326 | Multiple specifications can be given by separating them with spaces, a'la | |
1327 | .nf | |
1328 | -name '*.c *.h' | |
1329 | .fi | |
1330 | to consider C source and header files. | |
1331 | If | |
1332 | .I GLOB | |
1333 | doesn't contain any special pattern characters, a '*' is prepended. | |
1334 | This last example could have been given as | |
1335 | .nf | |
1336 | -name '.c .h' | |
1337 | .fi | |
1338 | It could also be given as | |
1339 | .nf | |
1340 | -name .c -name .h | |
1341 | .fi | |
1342 | or | |
1343 | .nf | |
1344 | -name '*.c' -name '*.h' | |
1345 | .fi | |
1346 | or | |
1347 | .nf | |
1348 | -name '*.[ch]' | |
1349 | .fi | |
1350 | (among others) | |
1351 | but in this last case, you have to be sure to supply the leading '*'. | |
1352 | .TP | |
1353 | .BI -path " GLOB" | |
1354 | Like | |
1355 | .B -name | |
1356 | except the entire path is checked against the pattern. | |
1357 | .TP | |
1358 | .B -regex " REGEX" | |
1359 | Considers files whose names (not paths) match the given perl regex | |
1360 | exactly. | |
1361 | .TP | |
1362 | .BI -iname " GLOB" | |
1363 | Case-insensitive version of | |
1364 | .BR -name . | |
1365 | .TP | |
1366 | .BI -ipath " GLOB" | |
1367 | Case-insensitive version of | |
1368 | .BR -path . | |
1369 | .TP | |
1370 | .BI -iregex " REGEX" | |
1371 | Case-insensitive version of | |
1372 | .BR -regex . | |
1373 | ||
1374 | .TP | |
1375 | .BI -dpath " GLOB" | |
1376 | Only search down directories whose path matches the given pattern (this | |
1377 | doesn't apply to the initial directory given by | |
1378 | .BI -dir , | |
1379 | of course). | |
1380 | Something like | |
1381 | .nf | |
1382 | -dir /usr/man -dpath /usr/man/man* | |
1383 | .fi | |
1384 | would completely skip | |
1385 | "/usr/man/cat1", "/usr/man/cat2", etc. | |
1386 | .TP | |
1387 | .BI -dskip " GLOB" | |
1388 | Skips directories whose name (not path) matches the given pattern. | |
1389 | Something like | |
1390 | .nf | |
1391 | -dir /usr/man -dskip cat* | |
1392 | .fi | |
1393 | would completely skip any directory in the tree whose name begins with "cat" | |
1394 | (including "/usr/man/cat1", "/usr/man/cat2", etc.). | |
1395 | .TP | |
1396 | .BI -dregex " REGEX" | |
1397 | Like | |
1398 | .BI -dpath , | |
1399 | but the pattern is a full perl regex. Note that this quite different | |
1400 | from | |
1401 | .B -regex | |
1402 | which considers only file names (not paths). This option considers | |
1403 | full directory paths (not just names). It's much more useful this way. | |
1404 | Sorry if it's confusing. | |
1405 | .TP | |
1406 | .BI -dpath " GLOB" | |
1407 | This option exists, but is probably not very useful. It probably wants to | |
1408 | be like the '-below' or something I mention in the "TODO" section. | |
1409 | .TP | |
1410 | .BI -idpath " GLOB" | |
1411 | Case-insensitive version of | |
1412 | .BR -dpath . | |
1413 | .TP | |
1414 | .BI -idskip " GLOB" | |
1415 | Case-insensitive version of | |
1416 | .BR -dskip . | |
1417 | .TP | |
1418 | .BI -idregex " REGEX" | |
1419 | Case-insensitive version of | |
1420 | .BR -dregex . | |
1421 | .TP | |
1422 | .B -all | |
1423 | Ignore any 'magic' or 'option' lines in the startup file. | |
1424 | The effect is that all files that would otherwise be automatically | |
1425 | excluded are considered. | |
1426 | .TP | |
1427 | .BI -x SPECIAL | |
1428 | Arguments starting with | |
1429 | .B -x | |
1430 | (except | |
1431 | .BR -xdev , | |
1432 | explained elsewhere) do special interaction with the | |
1433 | .I ~/.search | |
1434 | startup file. Something like | |
1435 | .nf | |
1436 | -xflag1 -xflag2 | |
1437 | .fi | |
1438 | will turn on "flag1" and "flag2" in the startup file (and is | |
1439 | the same as "-xflag1,flag2"). You can use this to write your own | |
1440 | rules for what kinds of files are to be considered. | |
1441 | ||
1442 | For example, the internal-default startup file contains the line | |
1443 | .nf | |
1444 | <!~> option: -skip '~ #' | |
1445 | .fi | |
1446 | This means that if the | |
1447 | .B -x~ | |
1448 | flag is | |
1449 | .I not | |
1450 | seen, the option | |
1451 | .nf | |
1452 | -skip '~ #' | |
1453 | .fi | |
1454 | should be done. | |
1455 | The effect is that emacs temp and backup files are not normally | |
1456 | considered, but you can included them with the -x~ flag. | |
1457 | ||
1458 | You can write your own rules to customize | |
1459 | .I search | |
1460 | in powerful ways. See the STARTUP FILE section below. | |
1461 | .TP | |
1462 | .B -why | |
1463 | Print a message (to stderr) when and why a file is not considered. | |
1464 | ||
1465 | .SH "OPTIONS TELLING WHAT TO DO WITH FILES THAT WILL BE CONSIDERED" | |
1466 | .TP | |
1467 | .B -find | |
1468 | (you can use | |
1469 | .B -f | |
1470 | as well). | |
1471 | This option changes the basic action of | |
1472 | .IR search . | |
1473 | ||
1474 | Normally, if a file is considered, it is searched | |
1475 | for the regular expressions as described earlier. However, if this option | |
1476 | is given, the filename is printed and no searching takes place. This turns | |
1477 | .I search | |
1478 | into a 'find' of some sorts. | |
1479 | ||
1480 | In this case, no regular expressions are needed on the command line | |
1481 | (any that are there are silently ignored). | |
1482 | ||
1483 | This is not intended to be a replacement for the 'find' program, | |
1484 | but to aid | |
1485 | you in understanding just what files are getting past the exclusion checks. | |
1486 | If you really want to use it as a sort of replacement for the 'find' program, | |
1487 | you might want to use | |
1488 | .B -all | |
1489 | so that it doesn't waste time checking to see if the file is binary, etc | |
1490 | (unless you really want that, of course). | |
1491 | ||
1492 | If you use | |
1493 | .BR -find , | |
1494 | none of the "GREP-LIKE OPTIONS" (below) matter. | |
1495 | ||
1496 | As a replacement for 'find', | |
1497 | .I search | |
1498 | is probably a bit slower (or in the case of GNU find, a lot slower -- | |
1499 | GNU find is | |
1500 | .I unbelievably | |
1501 | fast). | |
1502 | However, "search -ffind" | |
1503 | might be more useful than 'find' when options such as | |
1504 | .B -skip | |
1505 | are used (at least until 'find' gets such functionality). | |
1506 | .TP | |
1507 | .B -ffind | |
1508 | (or | |
1509 | .BR -ff ) | |
1510 | A faster more 'find'-like find. Does | |
1511 | .nf | |
1512 | -find -all -dorep | |
1513 | .fi | |
1514 | .SH "GREP-LIKE OPTIONS" | |
1515 | These options control how a searched file is accessed, | |
1516 | and how things are printed. | |
1517 | .TP | |
1518 | .B -i | |
1519 | Ignore letter case when matching. | |
1520 | .TP | |
1521 | .B -w | |
1522 | Consider only whole-word matches ("whole word" as defined by perl's "\\b" | |
1523 | regex). | |
1524 | .TP | |
1525 | .B -u | |
1526 | If the regex(es) is/are simple, try to modify them so that they'll work | |
1527 | in manpage-like underlined text (i.e. like _^Ht_^Hh_^Hi_^Hs). | |
1528 | This is very rudimentary at the moment. | |
1529 | .TP | |
1530 | .B -list | |
1531 | (you can use | |
1532 | .B -l | |
1533 | too). | |
1534 | Don't print matching lines, but the names of files that contain matching | |
1535 | lines. This will likely be *much* faster, as special optimizations are | |
1536 | made -- particularly with large files. | |
1537 | .TP | |
1538 | .B -n | |
1539 | Pepfix each line by its line number. | |
1540 | .TP | |
1541 | .B -nice | |
1542 | Not a grep-like option, but similar to | |
1543 | .BR -list , | |
1544 | so included here. | |
1545 | .B -nice | |
1546 | will have the output be a bit more human-readable, with matching lines printed | |
1547 | slightly indented after the filename, a'la | |
1548 | .nf | |
1549 | ||
1550 | % search foo | |
1551 | somedir/somefile: line with foo in it | |
1552 | somedir/somefile: some food for thought | |
1553 | anotherdir/x: don't be a buffoon! | |
1554 | % | |
1555 | ||
1556 | .fi | |
1557 | will become | |
1558 | .nf | |
1559 | ||
1560 | % search -nice foo | |
1561 | somedir/somefile: | |
1562 | line with foo in it | |
1563 | some food for thought | |
1564 | anotherdir/x: | |
1565 | don't be a buffoon! | |
1566 | % | |
1567 | ||
1568 | .fi | |
1569 | This option due to Lionel Cons. | |
1570 | .TP | |
1571 | .B -nnice | |
1572 | Be a bit nicer than | |
1573 | .BR -nice . | |
1574 | Prefix each file's output by a rule line, and follow with an extra blank line. | |
1575 | .TP | |
1576 | .B -h | |
1577 | Don't prepend each output line with the name of the file | |
1578 | (meaningless when | |
1579 | .B -find | |
1580 | or | |
1581 | .B -l | |
1582 | are given). | |
1583 | ||
1584 | .SH "OTHER OPTIONS" | |
1585 | .TP | |
1586 | .B -help | |
1587 | Print the usage information. | |
1588 | .TP | |
1589 | .B -version | |
1590 | Print the version information and quit. | |
1591 | .TP | |
1592 | .B -v | |
1593 | Set the level of message verbosity. | |
1594 | .B -v | |
1595 | will print a note whenever a new directory is entered. | |
1596 | .B -vv | |
1597 | will also print a note "every so often". This can be useful to see | |
1598 | what's happening when searching huge directories. | |
1599 | .B -vvv | |
1600 | will print a new with every file. | |
1601 | .B -vvvv | |
1602 | is | |
1603 | -vvv | |
1604 | plus | |
1605 | .BR -why . | |
1606 | .TP | |
1607 | .B -e | |
1608 | This ends the options, and can be useful if the regex begins with '-'. | |
1609 | .TP | |
1610 | .B -showrc | |
1611 | Shows what is being considered in the startup file, then exits. | |
1612 | .TP | |
1613 | .B -dorep | |
1614 | Normally, an identical file won't be checked twice (even with multiple | |
1615 | hard or symbolic links). If you're just trying to do a fast | |
1616 | .BR -find , | |
1617 | the bookkeeping to remember which files have been seen is not desirable, | |
1618 | so you can eliminate the bookkeeping with this flag. | |
1619 | ||
1620 | .SH "STARTUP FILE" | |
1621 | When | |
1622 | .I search | |
1623 | starts up, it processes the directives in | |
1624 | .IR ~/.search . | |
1625 | If no such file exists, a default | |
1626 | internal version is used. | |
1627 | ||
1628 | The internal version looks like: | |
1629 | .nf | |
1630 | ||
1631 | magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/ | |
1632 | option: -skip '.a .COM .elc .EXE .gz .o .pbm .xbm .dvi' | |
1633 | option: -iskip '.tarz .zip .z .lzh .jpg .jpeg .gif .uu' | |
1634 | <!~> option: -skip '~ #' | |
1635 | ||
1636 | .fi | |
1637 | If you wish to create your own "~/.search", | |
1638 | you might consider copying the above, and then working from there. | |
1639 | ||
1640 | There are two kinds of directives in a startup file: "magic" and "option". | |
1641 | .RS 0n | |
1642 | .TP | |
1643 | OPTION | |
1644 | Option lines will automatically do the command-line options given. | |
1645 | For example, the line | |
1646 | .nf | |
1647 | option: -v | |
1648 | .fi | |
1649 | in you startup file will turn on -v every time, without needing to type it | |
1650 | on the command line. | |
1651 | ||
1652 | The text on the line after the "option:" directive is processed | |
1653 | like the Bourne shell, so make sure to pay attention to quoting. | |
1654 | .nf | |
1655 | option: -skip .exe .com | |
1656 | .fi | |
1657 | will give an error (".com" by itself isn't a valid option), while | |
1658 | .nf | |
1659 | option: -skip ".exe .com" | |
1660 | .fi | |
1661 | will properly include it as part of -skip's argument. | |
1662 | ||
1663 | .TP | |
1664 | MAGIC | |
1665 | Magic lines are used to determine if a file should be considered a binary | |
1666 | or not (the term "magic" refers to checking a file's magic number). These | |
1667 | are described in more detail below. | |
1668 | .RE | |
1669 | ||
1670 | Blank lines and comments (lines beginning with '#') are allowed. | |
1671 | ||
1672 | If a line begins with <...>, then it's a check to see if the | |
1673 | directive on the line should be done or not. The stuff inside the <...> | |
1674 | can contain perl's && (and), || (or), ! (not), and parens for grouping, | |
1675 | along with "flags" that might be indicated by the user with | |
1676 | .BI -x flag | |
1677 | options. | |
1678 | ||
1679 | For example, using "-xfoo" will cause "foo" to be true inside the <...> | |
1680 | blocks. Therefore, a line beginning with "<foo>" would be done only when | |
1681 | "-xfoo" had been specified, while a line beginning with "<!foo>" would be | |
1682 | done only when "-xfoo" is not specified (of course, a line without any <...> | |
1683 | is done in either case). | |
1684 | ||
1685 | A realistic example might be | |
1686 | .nf | |
1687 | <!v> -vv | |
1688 | .fi | |
1689 | This will cause -vv messages to be the default, but allow "-xv" to override. | |
1690 | ||
1691 | There are a few flags that are set automatically: | |
1692 | .RS | |
1693 | .TP | |
1694 | .B TTY | |
1695 | true if the output is to the screen (as opposed to being redirected to a file). | |
1696 | You can force this (as with all the other automatic flags) with -xTTY. | |
1697 | .TP | |
1698 | .B -v | |
1699 | True if -v was specified. If -vv was specified, both | |
1700 | .B -v | |
1701 | and | |
1702 | .B -vv | |
1703 | flags are true (and so on). | |
1704 | .TP | |
1705 | .B -nice | |
1706 | True if -nice was specified. Same thing about -nnice as for -vv. | |
1707 | .PP | |
1708 | .TP | |
1709 | .B -list | |
1710 | true if -list (or -l) was given. | |
1711 | .TP | |
1712 | .B -dir | |
1713 | true if -dir was given. | |
1714 | .RE | |
1715 | ||
1716 | Using this info, you might change the last example to | |
1717 | .nf | |
1718 | ||
1719 | <!v && !-v> option: -vv | |
1720 | ||
1721 | .fi | |
1722 | The added "&& !-v" means "and if the '-v' option not given". | |
1723 | This will allow you to use "-v" alone on the command line, and not | |
1724 | have this directive add the more verbose "-vv" automatically. | |
1725 | ||
1726 | .RS 0 | |
1727 | Some other examples: | |
1728 | .TP | |
1729 | <!-dir && !here> option: -dir ~/ | |
1730 | Effectively make the default directory your home directory (instead of the | |
1731 | current directory). Using -dir or -xhere will undo this. | |
1732 | .TP | |
1733 | <tex> option: -name .tex -dir ~/pub | |
1734 | Create '-xtex' to search only "*.tex" files in your ~/pub directory tree. | |
1735 | Actually, this could be made a bit better. If you combine '-xtex' and '-dir' | |
1736 | on the command line, this directive will add ~/pub to the list, when you | |
1737 | probably want to use the -dir directory only. You could do | |
1738 | .nf | |
1739 | ||
1740 | <tex> option: -name .tex | |
1741 | <tex && !-dir> option: -dir ~/pub | |
1742 | .fi | |
1743 | ||
1744 | to will allow '-xtex' to work as before, but allow a command-line "-dir" | |
1745 | to take precedence with respect to ~/pub. | |
1746 | .TP | |
1747 | <fluff> option: -nnice -sort -i -vvv | |
1748 | Combine a few user-friendly options into one '-xfluff' option. | |
1749 | .TP | |
1750 | <man> option: -ddir /usr/man -v -w | |
1751 | When the '-xman' option is given, search "/usr/man" for whole-words | |
1752 | (of whatever regex or regexes are given on the command line), with -v. | |
1753 | .RE | |
1754 | ||
1755 | The lines in the startup file are executed from top to bottom, so something | |
1756 | like | |
1757 | .nf | |
1758 | ||
1759 | <both> option: -xflag1 -xflag2 | |
1760 | <flag1> option: ...whatever... | |
1761 | <flag2> option: ...whatever... | |
1762 | ||
1763 | .fi | |
1764 | will allow '-xboth' to be the same as '-xflag1 -xflag2' (or '-xflag1,flag2' | |
1765 | for that matter). However, if you put the "<both>" line below the others, | |
1766 | they will not be true when encountered, so the result would be different | |
1767 | (and probably undesired). | |
1768 | ||
1769 | The "magic" directives are used to determine if a file looks to be binary | |
1770 | or not. The form of a magic line is | |
1771 | .nf | |
1772 | magic: \fISIZE\fP : \fIPERLCODE\fP | |
1773 | .fi | |
1774 | where | |
1775 | .I SIZE | |
1776 | is the number of bytes of the file you need to check, and | |
1777 | .I PERLCODE | |
1778 | is the code to do the check. Within | |
1779 | .IR PERLCODE , | |
1780 | the variable $H will hold at least the first | |
1781 | .I SIZE | |
1782 | bytes of the file (unless the file is shorter than that, of course). | |
1783 | It might hold more bytes. The perl should evaluate to true if the file | |
1784 | should be considered a binary. | |
1785 | ||
1786 | An example might be | |
1787 | .nf | |
1788 | magic: 6 : substr($H, 0, 6) eq 'GIF87a' | |
1789 | .fi | |
1790 | to test for a GIF ("-iskip .gif" is better, but this might be useful | |
1791 | if you have images in files without the ".gif" extension). | |
1792 | ||
1793 | Since the startup file is checked from top to bottom, you can be a bit | |
1794 | efficient: | |
1795 | .nf | |
1796 | magic: 6 : ($x6 = substr($H, 0, 6)) eq 'GIF87a' | |
1797 | magic: 6 : $x6 eq 'GIF89a' | |
1798 | .fi | |
1799 | You could also write the same thing as | |
1800 | .nf | |
1801 | magic: 6 : (($x6 = substr($H, 0, 6)) eq 'GIF87a') || ## an old gif, or.. \e | |
1802 | $x6 eq 'GIF89a' ## .. a new one. | |
1803 | .fi | |
1804 | since newlines may be escaped. | |
1805 | ||
1806 | The default internal startup file includes | |
1807 | .nf | |
1808 | magic: 32 : $H =~ m/[\ex00-\ex06\ex10-\ex1a\ex1c-\ex1f\ex80\exff]{2}/ | |
1809 | .fi | |
1810 | which checks for certain non-printable characters, and catches a large | |
1811 | number of binary files, including most system's executables, linkable | |
1812 | objects, compressed, tarred, and otherwise folded, spindled, and mutilated | |
1813 | files. | |
1814 | ||
1815 | Another example might be | |
1816 | .nf | |
1817 | ## an archive library | |
1818 | magic: 17 : substr($H, 0, 17) eq "!<arch>\en__.SYMDEF" | |
1819 | .fi | |
1820 | ||
1821 | .SH "RETURN VALUE" | |
1822 | .I Search | |
1823 | returns zero if lines (or files, if appropriate) were found, | |
1824 | or if no work was requested (such as with | |
1825 | .BR -help ). | |
1826 | Returns 1 if no lines (or files) were found. | |
1827 | Returns 2 on error. | |
1828 | ||
1829 | .SH TODO | |
1830 | Things I'd like to add some day: | |
1831 | .nf | |
1832 | + show surrounding lines (context). | |
1833 | + highlight matched portions of lines. | |
1834 | + add '-and', which can go between regexes to override | |
1835 | the default logical or of the regexes. | |
1836 | + add something like | |
1837 | -below GLOB | |
1838 | which will examine a tree and only consider files that | |
1839 | lie in a directory deeper than one named by the pattern. | |
1840 | + add 'warning' and 'error' directives. | |
1841 | + add 'help' directive. | |
1842 | .fi | |
1843 | .SH BUGS | |
1844 | If -xdev and multiple -dir arguments are given, any file in any of the | |
1845 | target filesystems are allowed. It would be better to allow each filesystem | |
1846 | for each separate tree. | |
1847 | ||
1848 | Multiple -dir args might also cause some confusing effects. Doing | |
1849 | .nf | |
1850 | -dir some/dir -dir other | |
1851 | .fi | |
1852 | will search "some/dir" completely, then search "other" completely. This | |
1853 | is good. However, something like | |
1854 | .nf | |
1855 | -dir some/dir -dir some/dir/more/specific | |
1856 | .fi | |
1857 | will search "some/dir" completely *except for* "some/dir/more/specific", | |
1858 | after which it will return and be searched. Not really a bug, but just sort | |
1859 | of odd. | |
1860 | ||
1861 | File times (for -newer, etc.) of symbolic links are for the file, not the | |
1862 | link. This could cause some misunderstandings. | |
1863 | ||
1864 | Probably more. Please let me know. | |
1865 | .SH AUTHOR | |
1866 | Jeffrey Friedl, Omron Corp (jfriedl@omron.co.jp) | |
1867 | .br | |
1868 | http://www.wg.omron.co.jp/cgi-bin/j-e/jfriedl.html | |
1869 | ||
1870 | .SH "LATEST SOURCE" | |
1871 | See http://www.wg.omron.co.jp/~jfriedl/perl/index.html |