Commit | Line | Data |
---|---|---|
deabda19 CBW |
1 | #!/usr/bin/perl |
2 | ############################################################################## | |
3 | # Tool for using regular expressions against the contents of files in a tar | |
a27417a0 | 4 | # archive. See 'ptargrep --help' for more documentation. |
deabda19 CBW |
5 | # |
6 | ||
cee96d52 | 7 | BEGIN { pop @INC if $INC[-1] eq '.' } |
deabda19 CBW |
8 | use strict; |
9 | use warnings; | |
10 | ||
11 | use Pod::Usage qw(pod2usage); | |
12 | use Getopt::Long qw(GetOptions); | |
13 | use Archive::Tar qw(); | |
14 | use File::Path qw(mkpath); | |
15 | ||
16 | my(%opt, $pattern); | |
17 | ||
18 | if(!GetOptions(\%opt, | |
19 | 'basename|b', | |
20 | 'ignore-case|i', | |
21 | 'list-only|l', | |
22 | 'verbose|v', | |
23 | 'help|?', | |
24 | )) { | |
25 | pod2usage(-exitval => 1, -verbose => 0); | |
26 | } | |
27 | ||
28 | ||
29 | pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; | |
30 | ||
31 | pod2usage(-exitval => 1, -verbose => 0, | |
32 | -message => "No pattern specified", | |
33 | ) unless @ARGV; | |
34 | make_pattern( shift(@ARGV) ); | |
35 | ||
36 | pod2usage(-exitval => 1, -verbose => 0, | |
37 | -message => "No tar files specified", | |
38 | ) unless @ARGV; | |
39 | ||
40 | process_archive($_) foreach @ARGV; | |
41 | ||
42 | exit 0; | |
43 | ||
44 | ||
45 | sub make_pattern { | |
46 | my($pat) = @_; | |
47 | ||
48 | if($opt{'ignore-case'}) { | |
49 | $pattern = qr{(?im)$pat}; | |
50 | } | |
51 | else { | |
52 | $pattern = qr{(?m)$pat}; | |
53 | } | |
54 | } | |
55 | ||
56 | ||
57 | sub process_archive { | |
58 | my($filename) = @_; | |
59 | ||
60 | _log("Processing archive: $filename"); | |
61 | my $next = Archive::Tar->iter($filename); | |
62 | while( my $f = $next->() ) { | |
63 | next unless $f->is_file; | |
64 | match_file($f) if $f->size > 0; | |
65 | } | |
66 | } | |
67 | ||
68 | ||
69 | sub match_file { | |
70 | my($f) = @_; | |
71 | my $path = $f->name; | |
c465fd2f CBW |
72 | my $prefix = $f->prefix; |
73 | if (defined $prefix) { | |
74 | $path = File::Spec->catfile($prefix, $path); | |
75 | } | |
deabda19 CBW |
76 | |
77 | _log("filename: %s (%d bytes)", $path, $f->size); | |
78 | ||
79 | my $body = $f->get_content(); | |
80 | if($body !~ $pattern) { | |
81 | _log(" no match"); | |
82 | return; | |
83 | } | |
84 | ||
85 | if($opt{'list-only'}) { | |
86 | print $path, "\n"; | |
87 | return; | |
88 | } | |
89 | ||
90 | save_file($path, $body); | |
91 | } | |
92 | ||
93 | ||
94 | sub save_file { | |
95 | my($path, $body) = @_; | |
96 | ||
97 | _log(" found match - extracting"); | |
98 | my($fh); | |
99 | my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; | |
100 | if($dir and not $opt{basename}) { | |
101 | _log(" writing to $dir/$file"); | |
102 | $dir =~ s{\A/}{./}; | |
103 | mkpath($dir) unless -d $dir; | |
104 | open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; | |
105 | } | |
106 | else { | |
107 | _log(" writing to ./$file"); | |
108 | open $fh, '>', $file or die "open($file): $!"; | |
109 | } | |
110 | print $fh $body; | |
111 | close($fh); | |
112 | } | |
113 | ||
114 | ||
115 | sub _log { | |
116 | return unless $opt{verbose}; | |
117 | my($format, @args) = @_; | |
118 | warn sprintf($format, @args) . "\n"; | |
119 | } | |
120 | ||
121 | ||
122 | __END__ | |
123 | ||
124 | =head1 NAME | |
125 | ||
a27417a0 | 126 | ptargrep - Apply pattern matching to the contents of files in a tar archive |
deabda19 CBW |
127 | |
128 | =head1 SYNOPSIS | |
129 | ||
a27417a0 | 130 | ptargrep [options] <pattern> <tar file> ... |
deabda19 CBW |
131 | |
132 | Options: | |
133 | ||
134 | --basename|-b ignore directory paths from archive | |
135 | --ignore-case|-i do case-insensitive pattern matching | |
136 | --list-only|-l list matching filenames rather than extracting matches | |
137 | --verbose|-v write debugging message to STDERR | |
138 | --help|-? detailed help message | |
139 | ||
140 | =head1 DESCRIPTION | |
141 | ||
142 | This utility allows you to apply pattern matching to B<the contents> of files | |
143 | contained in a tar archive. You might use this to identify all files in an | |
144 | archive which contain lines matching the specified pattern and either print out | |
145 | the pathnames or extract the files. | |
146 | ||
147 | The pattern will be used as a Perl regular expression (as opposed to a simple | |
148 | grep regex). | |
149 | ||
150 | Multiple tar archive filenames can be specified - they will each be processed | |
151 | in turn. | |
152 | ||
153 | =head1 OPTIONS | |
154 | ||
155 | =over 4 | |
156 | ||
157 | =item B<--basename> (alias -b) | |
158 | ||
159 | When matching files are extracted, ignore the directory path from the archive | |
160 | and write to the current directory using the basename of the file from the | |
161 | archive. Beware: if two matching files in the archive have the same basename, | |
162 | the second file extracted will overwrite the first. | |
163 | ||
164 | =item B<--ignore-case> (alias -i) | |
165 | ||
166 | Make pattern matching case-insensitive. | |
167 | ||
168 | =item B<--list-only> (alias -l) | |
169 | ||
170 | Print the pathname of each matching file from the archive to STDOUT. Without | |
171 | this option, the default behaviour is to extract each matching file. | |
172 | ||
173 | =item B<--verbose> (alias -v) | |
174 | ||
175 | Log debugging info to STDERR. | |
176 | ||
177 | =item B<--help> (alias -?) | |
178 | ||
179 | Display this documentation. | |
180 | ||
181 | =back | |
182 | ||
183 | =head1 COPYRIGHT | |
184 | ||
185 | Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> | |
186 | ||
187 | This program is free software; you can redistribute it and/or modify it | |
93e94d8a | 188 | under the same terms as Perl itself. |
deabda19 CBW |
189 | |
190 | =cut | |
191 | ||
192 | ||
193 |