Commit | Line | Data |
---|---|---|
deabda19 CBW |
1 | #!/usr/bin/perl |
2 | ############################################################################## | |
3 | # Tool for using regular expressions against the contents of files in a tar | |
a27417a0 | 4 | # archive. See 'ptargrep --help' for more documentation. |
deabda19 CBW |
5 | # |
6 | ||
7 | use strict; | |
8 | use warnings; | |
9 | ||
10 | use Pod::Usage qw(pod2usage); | |
11 | use Getopt::Long qw(GetOptions); | |
12 | use Archive::Tar qw(); | |
13 | use File::Path qw(mkpath); | |
14 | ||
15 | my(%opt, $pattern); | |
16 | ||
17 | if(!GetOptions(\%opt, | |
18 | 'basename|b', | |
19 | 'ignore-case|i', | |
20 | 'list-only|l', | |
21 | 'verbose|v', | |
22 | 'help|?', | |
23 | )) { | |
24 | pod2usage(-exitval => 1, -verbose => 0); | |
25 | } | |
26 | ||
27 | ||
28 | pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; | |
29 | ||
30 | pod2usage(-exitval => 1, -verbose => 0, | |
31 | -message => "No pattern specified", | |
32 | ) unless @ARGV; | |
33 | make_pattern( shift(@ARGV) ); | |
34 | ||
35 | pod2usage(-exitval => 1, -verbose => 0, | |
36 | -message => "No tar files specified", | |
37 | ) unless @ARGV; | |
38 | ||
39 | process_archive($_) foreach @ARGV; | |
40 | ||
41 | exit 0; | |
42 | ||
43 | ||
44 | sub make_pattern { | |
45 | my($pat) = @_; | |
46 | ||
47 | if($opt{'ignore-case'}) { | |
48 | $pattern = qr{(?im)$pat}; | |
49 | } | |
50 | else { | |
51 | $pattern = qr{(?m)$pat}; | |
52 | } | |
53 | } | |
54 | ||
55 | ||
56 | sub process_archive { | |
57 | my($filename) = @_; | |
58 | ||
59 | _log("Processing archive: $filename"); | |
60 | my $next = Archive::Tar->iter($filename); | |
61 | while( my $f = $next->() ) { | |
62 | next unless $f->is_file; | |
63 | match_file($f) if $f->size > 0; | |
64 | } | |
65 | } | |
66 | ||
67 | ||
68 | sub match_file { | |
69 | my($f) = @_; | |
70 | my $path = $f->name; | |
c465fd2f CBW |
71 | my $prefix = $f->prefix; |
72 | if (defined $prefix) { | |
73 | $path = File::Spec->catfile($prefix, $path); | |
74 | } | |
deabda19 CBW |
75 | |
76 | _log("filename: %s (%d bytes)", $path, $f->size); | |
77 | ||
78 | my $body = $f->get_content(); | |
79 | if($body !~ $pattern) { | |
80 | _log(" no match"); | |
81 | return; | |
82 | } | |
83 | ||
84 | if($opt{'list-only'}) { | |
85 | print $path, "\n"; | |
86 | return; | |
87 | } | |
88 | ||
89 | save_file($path, $body); | |
90 | } | |
91 | ||
92 | ||
93 | sub save_file { | |
94 | my($path, $body) = @_; | |
95 | ||
96 | _log(" found match - extracting"); | |
97 | my($fh); | |
98 | my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; | |
99 | if($dir and not $opt{basename}) { | |
100 | _log(" writing to $dir/$file"); | |
101 | $dir =~ s{\A/}{./}; | |
102 | mkpath($dir) unless -d $dir; | |
103 | open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; | |
104 | } | |
105 | else { | |
106 | _log(" writing to ./$file"); | |
107 | open $fh, '>', $file or die "open($file): $!"; | |
108 | } | |
109 | print $fh $body; | |
110 | close($fh); | |
111 | } | |
112 | ||
113 | ||
114 | sub _log { | |
115 | return unless $opt{verbose}; | |
116 | my($format, @args) = @_; | |
117 | warn sprintf($format, @args) . "\n"; | |
118 | } | |
119 | ||
120 | ||
121 | __END__ | |
122 | ||
123 | =head1 NAME | |
124 | ||
a27417a0 | 125 | ptargrep - Apply pattern matching to the contents of files in a tar archive |
deabda19 CBW |
126 | |
127 | =head1 SYNOPSIS | |
128 | ||
a27417a0 | 129 | ptargrep [options] <pattern> <tar file> ... |
deabda19 CBW |
130 | |
131 | Options: | |
132 | ||
133 | --basename|-b ignore directory paths from archive | |
134 | --ignore-case|-i do case-insensitive pattern matching | |
135 | --list-only|-l list matching filenames rather than extracting matches | |
136 | --verbose|-v write debugging message to STDERR | |
137 | --help|-? detailed help message | |
138 | ||
139 | =head1 DESCRIPTION | |
140 | ||
141 | This utility allows you to apply pattern matching to B<the contents> of files | |
142 | contained in a tar archive. You might use this to identify all files in an | |
143 | archive which contain lines matching the specified pattern and either print out | |
144 | the pathnames or extract the files. | |
145 | ||
146 | The pattern will be used as a Perl regular expression (as opposed to a simple | |
147 | grep regex). | |
148 | ||
149 | Multiple tar archive filenames can be specified - they will each be processed | |
150 | in turn. | |
151 | ||
152 | =head1 OPTIONS | |
153 | ||
154 | =over 4 | |
155 | ||
156 | =item B<--basename> (alias -b) | |
157 | ||
158 | When matching files are extracted, ignore the directory path from the archive | |
159 | and write to the current directory using the basename of the file from the | |
160 | archive. Beware: if two matching files in the archive have the same basename, | |
161 | the second file extracted will overwrite the first. | |
162 | ||
163 | =item B<--ignore-case> (alias -i) | |
164 | ||
165 | Make pattern matching case-insensitive. | |
166 | ||
167 | =item B<--list-only> (alias -l) | |
168 | ||
169 | Print the pathname of each matching file from the archive to STDOUT. Without | |
170 | this option, the default behaviour is to extract each matching file. | |
171 | ||
172 | =item B<--verbose> (alias -v) | |
173 | ||
174 | Log debugging info to STDERR. | |
175 | ||
176 | =item B<--help> (alias -?) | |
177 | ||
178 | Display this documentation. | |
179 | ||
180 | =back | |
181 | ||
182 | =head1 COPYRIGHT | |
183 | ||
184 | Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> | |
185 | ||
186 | This program is free software; you can redistribute it and/or modify it | |
93e94d8a | 187 | under the same terms as Perl itself. |
deabda19 CBW |
188 | |
189 | =cut | |
190 | ||
191 | ||
192 |