Commit | Line | Data |
---|---|---|
b38acab9 JH |
1 | package Filter::Simple; |
2 | ||
dfa18578 | 3 | use Text::Balanced ':ALL'; |
b38acab9 | 4 | |
dfa18578 JH |
5 | use vars qw{ $VERSION @EXPORT }; |
6 | ||
1b629f66 | 7 | $VERSION = '0.82'; |
b38acab9 JH |
8 | |
9 | use Filter::Util::Call; | |
10 | use Carp; | |
11 | ||
dfa18578 JH |
12 | @EXPORT = qw( FILTER FILTER_ONLY ); |
13 | ||
14 | ||
b38acab9 | 15 | sub import { |
1b629f66 RGS |
16 | if (@_>1) { shift; goto &FILTER } |
17 | else { *{caller()."::$_"} = \&$_ foreach @EXPORT } | |
b38acab9 JH |
18 | } |
19 | ||
dfa18578 | 20 | sub fail { |
1b629f66 | 21 | croak "FILTER_ONLY: ", @_; |
dfa18578 JH |
22 | } |
23 | ||
24 | my $exql = sub { | |
1b629f66 RGS |
25 | my @bits = extract_quotelike $_[0], qr//; |
26 | return unless $bits[0]; | |
27 | return \@bits; | |
dfa18578 JH |
28 | }; |
29 | ||
1b629f66 RGS |
30 | my $ncws = qr/\s+/; |
31 | my $comment = qr/(?<![\$\@%])#.*/; | |
32 | my $ws = qr/(?:$ncws|$comment)+/; | |
dfa18578 JH |
33 | my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/; |
34 | my $EOP = qr/\n\n|\Z/; | |
35 | my $CUT = qr/\n=cut.*$EOP/; | |
36 | my $pod_or_DATA = qr/ | |
1b629f66 RGS |
37 | ^=(?:head[1-4]|item) .*? $CUT |
38 | | ^=pod .*? $CUT | |
39 | | ^=for .*? $EOP | |
40 | | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP | |
41 | | ^__(DATA|END)__\r?\n.* | |
42 | /smx; | |
dfa18578 JH |
43 | |
44 | my %extractor_for = ( | |
1b629f66 RGS |
45 | quotelike => [ $ws, \&extract_variable, $id, { MATCH => \&extract_quotelike } ], |
46 | regex => [ $ws, $pod_or_DATA, $id, $exql ], | |
47 | string => [ $ws, $pod_or_DATA, $id, $exql ], | |
48 | code => [ $ws, { DONT_MATCH => $pod_or_DATA }, | |
49 | \&extract_variable, | |
50 | $id, { DONT_MATCH => \&extract_quotelike } ], | |
51 | code_no_comments | |
52 | => [ { DONT_MATCH => $comment }, | |
53 | $ncws, { DONT_MATCH => $pod_or_DATA }, | |
54 | \&extract_variable, | |
55 | $id, { DONT_MATCH => \&extract_quotelike } ], | |
56 | executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], | |
57 | executable_no_comments | |
58 | => [ { DONT_MATCH => $comment }, | |
59 | $ncws, { DONT_MATCH => $pod_or_DATA } ], | |
60 | all => [ { MATCH => qr/(?s:.*)/ } ], | |
dfa18578 JH |
61 | ); |
62 | ||
63 | my %selector_for = ( | |
1b629f66 RGS |
64 | all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, |
65 | executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, | |
66 | quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, | |
67 | regex => sub { my ($t)=@_; | |
68 | sub{ref() or return $_; | |
69 | my ($ql,undef,$pre,$op,$ld,$pat) = @$_; | |
70 | return $_->[0] unless $op =~ /^(qr|m|s)/ | |
71 | || !$op && ($ld eq '/' || $ld eq '?'); | |
72 | $_ = $pat; | |
73 | $t->(@_); | |
74 | $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; | |
75 | return "$pre$ql"; | |
76 | }; | |
77 | }, | |
78 | string => sub { my ($t)=@_; | |
79 | sub{ref() or return $_; | |
80 | local *args = \@_; | |
81 | my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; | |
82 | return $_->[0] if $op =~ /^(qr|m)/ | |
83 | || !$op && ($ld1 eq '/' || $ld1 eq '?'); | |
84 | if (!$op || $op eq 'tr' || $op eq 'y') { | |
85 | local *_ = \$str1; | |
86 | $t->(@args); | |
87 | } | |
88 | if ($op =~ /^(tr|y|s)/) { | |
89 | local *_ = \$str2; | |
90 | $t->(@args); | |
91 | } | |
92 | my $result = "$pre$op$ld1$str1$rd1"; | |
93 | $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> | |
94 | $result .= "$str2$rd2$flg"; | |
95 | return $result; | |
96 | }; | |
97 | }, | |
dfa18578 JH |
98 | ); |
99 | ||
100 | ||
101 | sub gen_std_filter_for { | |
1b629f66 RGS |
102 | my ($type, $transform) = @_; |
103 | return sub { | |
104 | my $instr; | |
105 | local @components; | |
106 | for (extract_multiple($_,$extractor_for{$type})) { | |
107 | if (ref()) { push @components, $_; $instr=0 } | |
108 | elsif ($instr) { $components[-1] .= $_ } | |
109 | else { push @components, $_; $instr=1 } | |
110 | } | |
111 | if ($type =~ /^code/) { | |
112 | my $count = 0; | |
113 | local $placeholder = qr/\Q$;\E(\C{4})\Q$;\E/; | |
114 | my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/; | |
115 | $_ = join "", | |
116 | map { ref $_ ? $;.pack('N',$count++).$; : $_ } | |
117 | @components; | |
118 | @components = grep { ref $_ } @components; | |
119 | $transform->(@_); | |
120 | s/$extractor/${$components[unpack('N',$1)]}/g; | |
121 | } | |
122 | else { | |
123 | my $selector = $selector_for{$type}->($transform); | |
124 | $_ = join "", map $selector->(@_), @components; | |
125 | } | |
126 | } | |
dfa18578 JH |
127 | }; |
128 | ||
1b629f66 RGS |
129 | sub FILTER (&;$) { |
130 | my $caller = caller; | |
131 | my ($filter, $terminator) = @_; | |
132 | no warnings 'redefine'; | |
133 | *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); | |
134 | *{"${caller}::unimport"} = gen_filter_unimport($caller); | |
135 | } | |
136 | ||
dfa18578 | 137 | sub FILTER_ONLY { |
1b629f66 RGS |
138 | my $caller = caller; |
139 | while (@_ > 1) { | |
140 | my ($what, $how) = splice(@_, 0, 2); | |
141 | fail "Unknown selector: $what" | |
142 | unless exists $extractor_for{$what}; | |
143 | fail "Filter for $what is not a subroutine reference" | |
144 | unless ref $how eq 'CODE'; | |
145 | push @transforms, gen_std_filter_for($what,$how); | |
146 | } | |
147 | my $terminator = shift; | |
148 | ||
149 | my $multitransform = sub { | |
150 | foreach my $transform ( @transforms ) { | |
151 | $transform->(@_); | |
152 | } | |
153 | }; | |
154 | no warnings 'redefine'; | |
155 | *{"${caller}::import"} = | |
156 | gen_filter_import($caller,$multitransform,$terminator); | |
157 | *{"${caller}::unimport"} = gen_filter_unimport($caller); | |
dfa18578 JH |
158 | } |
159 | ||
160 | my $ows = qr/(?:[ \t]+|#[^\n]*)*/; | |
161 | ||
b38acab9 | 162 | sub gen_filter_import { |
fbe2c49e | 163 | my ($class, $filter, $terminator) = @_; |
201f4820 JH |
164 | my %terminator; |
165 | my $prev_import = *{$class."::import"}{CODE}; | |
b38acab9 | 166 | return sub { |
1b629f66 RGS |
167 | my ($imported_class, @args) = @_; |
168 | my $def_terminator = | |
169 | qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/; | |
170 | if (!defined $terminator) { | |
171 | $terminator{terminator} = $def_terminator; | |
172 | } | |
173 | elsif (!ref $terminator || ref $terminator eq 'Regexp') { | |
174 | $terminator{terminator} = $terminator; | |
175 | } | |
176 | elsif (ref $terminator ne 'HASH') { | |
177 | croak "Terminator must be specified as scalar or hash ref" | |
178 | } | |
179 | elsif (!exists $terminator->{terminator}) { | |
180 | $terminator{terminator} = $def_terminator; | |
181 | } | |
182 | filter_add( | |
183 | sub { | |
184 | my ($status, $lastline); | |
185 | my $count = 0; | |
186 | my $data = ""; | |
187 | while ($status = filter_read()) { | |
188 | return $status if $status < 0; | |
189 | if ($terminator{terminator} && | |
190 | m/$terminator{terminator}/) { | |
191 | $lastline = $_; | |
192 | last; | |
193 | } | |
194 | $data .= $_; | |
195 | $count++; | |
196 | $_ = ""; | |
197 | } | |
198 | return $count if not $count; | |
199 | $_ = $data; | |
200 | $filter->($imported_class, @args) unless $status < 0; | |
201 | if (defined $lastline) { | |
202 | if (defined $terminator{becomes}) { | |
203 | $_ .= $terminator{becomes}; | |
204 | } | |
205 | elsif ($lastline =~ $def_terminator) { | |
206 | $_ .= $lastline; | |
207 | } | |
208 | } | |
209 | return $count; | |
210 | } | |
211 | ); | |
212 | if ($prev_import) { | |
213 | goto &$prev_import; | |
214 | } | |
215 | elsif ($class->isa('Exporter')) { | |
216 | $class->export_to_level(1,@_); | |
217 | } | |
b38acab9 JH |
218 | } |
219 | } | |
220 | ||
201f4820 | 221 | sub gen_filter_unimport { |
1b629f66 RGS |
222 | my ($class) = @_; |
223 | return sub { | |
224 | filter_del(); | |
225 | goto &$prev_unimport if $prev_unimport; | |
226 | } | |
b38acab9 JH |
227 | } |
228 | ||
229 | 1; | |
230 | ||
231 | __END__ | |
232 | ||
233 | =head1 NAME | |
234 | ||
235 | Filter::Simple - Simplified source filtering | |
236 | ||
fbe2c49e | 237 | |
b38acab9 JH |
238 | =head1 SYNOPSIS |
239 | ||
240 | # in MyFilter.pm: | |
241 | ||
1b629f66 | 242 | package MyFilter; |
b38acab9 | 243 | |
1b629f66 RGS |
244 | use Filter::Simple; |
245 | ||
246 | FILTER { ... }; | |
b38acab9 | 247 | |
1b629f66 RGS |
248 | # or just: |
249 | # | |
250 | # use Filter::Simple sub { ... }; | |
b38acab9 JH |
251 | |
252 | # in user's code: | |
253 | ||
1b629f66 | 254 | use MyFilter; |
b38acab9 | 255 | |
1b629f66 | 256 | # this code is filtered |
b38acab9 | 257 | |
1b629f66 | 258 | no MyFilter; |
b38acab9 | 259 | |
1b629f66 | 260 | # this code is not |
b38acab9 JH |
261 | |
262 | ||
263 | =head1 DESCRIPTION | |
264 | ||
265 | =head2 The Problem | |
266 | ||
267 | Source filtering is an immensely powerful feature of recent versions of Perl. | |
268 | It allows one to extend the language itself (e.g. the Switch module), to | |
269 | simplify the language (e.g. Language::Pythonesque), or to completely recast the | |
270 | language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use | |
271 | the full power of Perl as its own, recursively applied, macro language. | |
272 | ||
273 | The excellent Filter::Util::Call module (by Paul Marquess) provides a | |
274 | usable Perl interface to source filtering, but it is often too powerful | |
275 | and not nearly as simple as it could be. | |
276 | ||
277 | To use the module it is necessary to do the following: | |
278 | ||
279 | =over 4 | |
280 | ||
281 | =item 1. | |
282 | ||
283 | Download, build, and install the Filter::Util::Call module. | |
55a1c97c | 284 | (If you have Perl 5.7.1 or later, this is already done for you.) |
b38acab9 JH |
285 | |
286 | =item 2. | |
287 | ||
288 | Set up a module that does a C<use Filter::Util::Call>. | |
289 | ||
290 | =item 3. | |
291 | ||
292 | Within that module, create an C<import> subroutine. | |
293 | ||
294 | =item 4. | |
295 | ||
296 | Within the C<import> subroutine do a call to C<filter_add>, passing | |
297 | it either a subroutine reference. | |
298 | ||
299 | =item 5. | |
300 | ||
301 | Within the subroutine reference, call C<filter_read> or C<filter_read_exact> | |
302 | to "prime" $_ with source code data from the source file that will | |
303 | C<use> your module. Check the status value returned to see if any | |
304 | source code was actually read in. | |
305 | ||
306 | =item 6. | |
307 | ||
308 | Process the contents of $_ to change the source code in the desired manner. | |
309 | ||
310 | =item 7. | |
311 | ||
312 | Return the status value. | |
313 | ||
314 | =item 8. | |
315 | ||
316 | If the act of unimporting your module (via a C<no>) should cause source | |
317 | code filtering to cease, create an C<unimport> subroutine, and have it call | |
318 | C<filter_del>. Make sure that the call to C<filter_read> or | |
319 | C<filter_read_exact> in step 5 will not accidentally read past the | |
320 | C<no>. Effectively this limits source code filters to line-by-line | |
321 | operation, unless the C<import> subroutine does some fancy | |
322 | pre-pre-parsing of the source code it's filtering. | |
323 | ||
324 | =back | |
325 | ||
326 | For example, here is a minimal source code filter in a module named | |
327 | BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG> | |
328 | to the sequence C<die 'BANG' if $BANG> in any piece of code following a | |
329 | C<use BANG;> statement (until the next C<no BANG;> statement, if any): | |
330 | ||
1b629f66 | 331 | package BANG; |
fbe2c49e | 332 | |
1b629f66 RGS |
333 | use Filter::Util::Call ; |
334 | ||
335 | sub import { | |
336 | filter_add( sub { | |
337 | my $caller = caller; | |
338 | my ($status, $no_seen, $data); | |
339 | while ($status = filter_read()) { | |
340 | if (/^\s*no\s+$caller\s*;\s*?$/) { | |
341 | $no_seen=1; | |
342 | last; | |
343 | } | |
344 | $data .= $_; | |
345 | $_ = ""; | |
b38acab9 | 346 | } |
1b629f66 RGS |
347 | $_ = $data; |
348 | s/BANG\s+BANG/die 'BANG' if \$BANG/g | |
349 | unless $status < 0; | |
350 | $_ .= "no $class;\n" if $no_seen; | |
351 | return 1; | |
352 | }) | |
353 | } | |
b38acab9 | 354 | |
1b629f66 RGS |
355 | sub unimport { |
356 | filter_del(); | |
357 | } | |
b38acab9 | 358 | |
1b629f66 | 359 | 1 ; |
b38acab9 | 360 | |
7bf0340c JH |
361 | This level of sophistication puts filtering out of the reach of |
362 | many programmers. | |
b38acab9 JH |
363 | |
364 | ||
365 | =head2 A Solution | |
366 | ||
7bf0340c | 367 | The Filter::Simple module provides a simplified interface to |
b38acab9 JH |
368 | Filter::Util::Call; one that is sufficient for most common cases. |
369 | ||
370 | Instead of the above process, with Filter::Simple the task of setting up | |
371 | a source code filter is reduced to: | |
372 | ||
373 | =over 4 | |
374 | ||
375 | =item 1. | |
376 | ||
55a1c97c JH |
377 | Download and install the Filter::Simple module. |
378 | (If you have Perl 5.7.1 or later, this is already done for you.) | |
379 | ||
380 | =item 2. | |
381 | ||
fbe2c49e JH |
382 | Set up a module that does a C<use Filter::Simple> and then |
383 | calls C<FILTER { ... }>. | |
b38acab9 | 384 | |
55a1c97c | 385 | =item 3. |
b38acab9 | 386 | |
fbe2c49e JH |
387 | Within the anonymous subroutine or block that is passed to |
388 | C<FILTER>, process the contents of $_ to change the source code in | |
389 | the desired manner. | |
b38acab9 JH |
390 | |
391 | =back | |
392 | ||
393 | In other words, the previous example, would become: | |
394 | ||
1b629f66 RGS |
395 | package BANG; |
396 | use Filter::Simple; | |
397 | ||
398 | FILTER { | |
399 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
400 | }; | |
b38acab9 | 401 | |
1b629f66 | 402 | 1 ; |
b38acab9 | 403 | |
55909bcd JH |
404 | Note that the source code is passed as a single string, so any regex that |
405 | uses C<^> or C<$> to detect line boundaries will need the C</m> flag. | |
b38acab9 | 406 | |
fbe2c49e JH |
407 | =head2 Disabling or changing <no> behaviour |
408 | ||
dfa18578 JH |
409 | By default, the installed filter only filters up to a line consisting of one of |
410 | the three standard source "terminators": | |
411 | ||
1b629f66 | 412 | no ModuleName; # optional comment |
fbe2c49e | 413 | |
dfa18578 | 414 | or: |
fbe2c49e | 415 | |
1b629f66 | 416 | __END__ |
dfa18578 JH |
417 | |
418 | or: | |
419 | ||
1b629f66 | 420 | __DATA__ |
dfa18578 JH |
421 | |
422 | but this can be altered by passing a second argument to C<use Filter::Simple> | |
423 | or C<FILTER> (just remember: there's I<no> comma after the initial block when | |
424 | you use C<FILTER>). | |
fbe2c49e JH |
425 | |
426 | That second argument may be either a C<qr>'d regular expression (which is then | |
427 | used to match the terminator line), or a defined false value (which indicates | |
dfa18578 JH |
428 | that no terminator line should be looked for), or a reference to a hash |
429 | (in which case the terminator is the value associated with the key | |
430 | C<'terminator'>. | |
fbe2c49e JH |
431 | |
432 | For example, to cause the previous filter to filter only up to a line of the | |
433 | form: | |
434 | ||
1b629f66 | 435 | GNAB esu; |
fbe2c49e JH |
436 | |
437 | you would write: | |
438 | ||
1b629f66 RGS |
439 | package BANG; |
440 | use Filter::Simple; | |
441 | ||
442 | FILTER { | |
443 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
444 | } | |
445 | qr/^\s*GNAB\s+esu\s*;\s*?$/; | |
dfa18578 JH |
446 | |
447 | or: | |
448 | ||
1b629f66 RGS |
449 | FILTER { |
450 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
451 | } | |
452 | { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; | |
fbe2c49e JH |
453 | |
454 | and to prevent the filter's being turned off in any way: | |
455 | ||
1b629f66 RGS |
456 | package BANG; |
457 | use Filter::Simple; | |
458 | ||
459 | FILTER { | |
460 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
461 | } | |
462 | ""; # or: 0 | |
dfa18578 JH |
463 | |
464 | or: | |
465 | ||
1b629f66 RGS |
466 | FILTER { |
467 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
468 | } | |
469 | { terminator => "" }; | |
dfa18578 | 470 | |
55909bcd | 471 | B<Note that, no matter what you set the terminator pattern to, |
dfa18578 | 472 | the actual terminator itself I<must> be contained on a single source line.> |
fbe2c49e JH |
473 | |
474 | ||
475 | =head2 All-in-one interface | |
476 | ||
477 | Separating the loading of Filter::Simple: | |
478 | ||
1b629f66 | 479 | use Filter::Simple; |
fbe2c49e JH |
480 | |
481 | from the setting up of the filtering: | |
482 | ||
1b629f66 | 483 | FILTER { ... }; |
fbe2c49e JH |
484 | |
485 | is useful because it allows other code (typically parser support code | |
486 | or caching variables) to be defined before the filter is invoked. | |
487 | However, there is often no need for such a separation. | |
488 | ||
489 | In those cases, it is easier to just append the filtering subroutine and | |
490 | any terminator specification directly to the C<use> statement that loads | |
491 | Filter::Simple, like so: | |
492 | ||
1b629f66 RGS |
493 | use Filter::Simple sub { |
494 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
495 | }; | |
fbe2c49e JH |
496 | |
497 | This is exactly the same as: | |
498 | ||
1b629f66 RGS |
499 | use Filter::Simple; |
500 | BEGIN { | |
501 | Filter::Simple::FILTER { | |
502 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
503 | }; | |
504 | } | |
fbe2c49e JH |
505 | |
506 | except that the C<FILTER> subroutine is not exported by Filter::Simple. | |
507 | ||
dfa18578 JH |
508 | |
509 | =head2 Filtering only specific components of source code | |
510 | ||
511 | One of the problems with a filter like: | |
512 | ||
1b629f66 | 513 | use Filter::Simple; |
dfa18578 | 514 | |
1b629f66 | 515 | FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; |
dfa18578 JH |
516 | |
517 | is that it indiscriminately applies the specified transformation to | |
518 | the entire text of your source program. So something like: | |
519 | ||
1b629f66 RGS |
520 | warn 'BANG BANG, YOU'RE DEAD'; |
521 | BANG BANG; | |
dfa18578 JH |
522 | |
523 | will become: | |
524 | ||
1b629f66 RGS |
525 | warn 'die 'BANG' if $BANG, YOU'RE DEAD'; |
526 | die 'BANG' if $BANG; | |
dfa18578 JH |
527 | |
528 | It is very common when filtering source to only want to apply the filter | |
529 | to the non-character-string parts of the code, or alternatively to I<only> | |
530 | the character strings. | |
531 | ||
532 | Filter::Simple supports this type of filtering by automatically | |
533 | exporting the C<FILTER_ONLY> subroutine. | |
534 | ||
535 | C<FILTER_ONLY> takes a sequence of specifiers that install separate | |
536 | (and possibly multiple) filters that act on only parts of the source code. | |
537 | For example: | |
538 | ||
1b629f66 | 539 | use Filter::Simple; |
dfa18578 | 540 | |
1b629f66 RGS |
541 | FILTER_ONLY |
542 | code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, | |
543 | quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g }; | |
dfa18578 JH |
544 | |
545 | The C<"code"> subroutine will only be used to filter parts of the source | |
546 | code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike> | |
547 | subroutine only filters Perl quotelikes (including here documents). | |
548 | ||
549 | The full list of alternatives is: | |
550 | ||
551 | =over | |
552 | ||
553 | =item C<"code"> | |
554 | ||
555 | Filters only those sections of the source code that are not quotelikes, POD, or | |
556 | C<__DATA__>. | |
557 | ||
1b629f66 RGS |
558 | =item C<"code_no_comments"> |
559 | ||
560 | Filters only those sections of the source code that are not quotelikes, POD, | |
561 | comments, or C<__DATA__>. | |
562 | ||
dfa18578 JH |
563 | =item C<"executable"> |
564 | ||
565 | Filters only those sections of the source code that are not POD or C<__DATA__>. | |
566 | ||
1b629f66 RGS |
567 | =item C<"executable_no_comments"> |
568 | ||
569 | Filters only those sections of the source code that are not POD, comments, or C<__DATA__>. | |
570 | ||
dfa18578 JH |
571 | =item C<"quotelike"> |
572 | ||
573 | Filters only Perl quotelikes (as interpreted by | |
574 | C<&Text::Balanced::extract_quotelike>). | |
575 | ||
576 | =item C<"string"> | |
577 | ||
578 | Filters only the string literal parts of a Perl quotelike (i.e. the | |
579 | contents of a string literal, either half of a C<tr///>, the second | |
580 | half of an C<s///>). | |
581 | ||
582 | =item C<"regex"> | |
583 | ||
584 | Filters only the pattern literal parts of a Perl quotelike (i.e. the | |
585 | contents of a C<qr//> or an C<m//>, the first half of an C<s///>). | |
586 | ||
587 | =item C<"all"> | |
588 | ||
589 | Filters everything. Identical in effect to C<FILTER>. | |
590 | ||
591 | =back | |
592 | ||
593 | Except for C<< FILTER_ONLY code => sub {...} >>, each of | |
594 | the component filters is called repeatedly, once for each component | |
595 | found in the source code. | |
596 | ||
597 | Note that you can also apply two or more of the same type of filter in | |
598 | a single C<FILTER_ONLY>. For example, here's a simple | |
599 | macro-preprocessor that is only applied within regexes, | |
55909bcd | 600 | with a final debugging pass that prints the resulting source code: |
dfa18578 | 601 | |
1b629f66 RGS |
602 | use Regexp::Common; |
603 | FILTER_ONLY | |
604 | regex => sub { s/!\[/[^/g }, | |
605 | regex => sub { s/%d/$RE{num}{int}/g }, | |
606 | regex => sub { s/%f/$RE{num}{real}/g }, | |
607 | all => sub { print if $::DEBUG }; | |
dfa18578 JH |
608 | |
609 | ||
610 | ||
611 | =head2 Filtering only the code parts of source code | |
612 | ||
613 | Most source code ceases to be grammatically correct when it is broken up | |
614 | into the pieces between string literals and regexes. So the C<'code'> | |
1b629f66 RGS |
615 | and C<'code_no_comments'> component filter behave slightly differently |
616 | from the other partial filters described in the previous section. | |
dfa18578 JH |
617 | |
618 | Rather than calling the specified processor on each individual piece of | |
1b629f66 RGS |
619 | code (i.e. on the bits between quotelikes), the C<'code...'> partial |
620 | filters operate on the entire source code, but with the quotelike bits | |
621 | (and, in the case of C<'code_no_comments'>, the comments) "blanked out". | |
dfa18578 | 622 | |
1b629f66 | 623 | That is, a C<'code...'> filter I<replaces> each quoted string, quotelike, |
dfa18578 JH |
624 | regex, POD, and __DATA__ section with a placeholder. The |
625 | delimiters of this placeholder are the contents of the C<$;> variable | |
626 | at the time the filter is applied (normally C<"\034">). The remaining | |
627 | four bytes are a unique identifier for the component being replaced. | |
628 | ||
629 | This approach makes it comparatively easy to write code preprocessors | |
630 | without worrying about the form or contents of strings, regexes, etc. | |
1b629f66 RGS |
631 | |
632 | For convenience, during a C<'code...'> filtering operation, Filter::Simple | |
633 | provides a package variable (C<$Filter::Simple::placeholder>) that | |
634 | contains a pre-compiled regex that matches any placeholder...and | |
635 | captures the identifier within the placeholder. Placeholders can be | |
dfa18578 JH |
636 | moved and re-ordered within the source code as needed. |
637 | ||
1b629f66 RGS |
638 | In addition, a second package variable (C<@Filter::Simple::components>) |
639 | contains a list of the various pieces of C<$_>, as they were originally split | |
640 | up to allow placeholders to be inserted. | |
641 | ||
642 | Once the filtering has been applied, the original strings, regexes, POD, | |
643 | etc. are re-inserted into the code, by replacing each placeholder with | |
644 | the corresponding original component (from C<@components>). Note that | |
645 | this means that the C<@components> variable must be treated with extreme | |
646 | care within the filter. The C<@components> array stores the "back- | |
647 | translations" of each placeholder inserted into C<$_>, as well as the | |
648 | interstitial source code between placeholders. If the placeholder | |
649 | backtranslations are altered in C<@components>, they will be similarly | |
650 | changed when the placeholders are removed from C<$_> after the filter | |
651 | is complete. | |
dfa18578 | 652 | |
3c4b39be | 653 | For example, the following filter detects concatenated pairs of |
dfa18578 JH |
654 | strings/quotelikes and reverses the order in which they are |
655 | concatenated: | |
656 | ||
1b629f66 RGS |
657 | package DemoRevCat; |
658 | use Filter::Simple; | |
dfa18578 | 659 | |
1b629f66 RGS |
660 | FILTER_ONLY code => sub { |
661 | my $ph = $Filter::Simple::placeholder; | |
662 | s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx | |
663 | }; | |
dfa18578 JH |
664 | |
665 | Thus, the following code: | |
666 | ||
1b629f66 | 667 | use DemoRevCat; |
dfa18578 | 668 | |
1b629f66 | 669 | my $str = "abc" . q(def); |
dfa18578 | 670 | |
1b629f66 | 671 | print "$str\n"; |
dfa18578 JH |
672 | |
673 | would become: | |
674 | ||
1b629f66 | 675 | my $str = q(def)."abc"; |
dfa18578 | 676 | |
1b629f66 | 677 | print "$str\n"; |
dfa18578 JH |
678 | |
679 | and hence print: | |
680 | ||
1b629f66 | 681 | defabc |
dfa18578 JH |
682 | |
683 | ||
201f4820 | 684 | =head2 Using Filter::Simple with an explicit C<import> subroutine |
55a1c97c | 685 | |
201f4820 JH |
686 | Filter::Simple generates a special C<import> subroutine for |
687 | your module (see L<"How it works">) which would normally replace any | |
688 | C<import> subroutine you might have explicitly declared. | |
55a1c97c | 689 | |
201f4820 JH |
690 | However, Filter::Simple is smart enough to notice your existing |
691 | C<import> and Do The Right Thing with it. | |
55909bcd | 692 | That is, if you explicitly define an C<import> subroutine in a package |
201f4820 JH |
693 | that's using Filter::Simple, that C<import> subroutine will still |
694 | be invoked immediately after any filter you install. | |
55a1c97c | 695 | |
201f4820 JH |
696 | The only thing you have to remember is that the C<import> subroutine |
697 | I<must> be declared I<before> the filter is installed. If you use C<FILTER> | |
698 | to install the filter: | |
55a1c97c | 699 | |
1b629f66 | 700 | package Filter::TurnItUpTo11; |
55a1c97c | 701 | |
1b629f66 | 702 | use Filter::Simple; |
55a1c97c | 703 | |
1b629f66 RGS |
704 | FILTER { s/(\w+)/\U$1/ }; |
705 | ||
201f4820 JH |
706 | that will almost never be a problem, but if you install a filtering |
707 | subroutine by passing it directly to the C<use Filter::Simple> | |
708 | statement: | |
55a1c97c | 709 | |
1b629f66 | 710 | package Filter::TurnItUpTo11; |
55a1c97c | 711 | |
1b629f66 | 712 | use Filter::Simple sub{ s/(\w+)/\U$1/ }; |
55a1c97c | 713 | |
201f4820 JH |
714 | then you must make sure that your C<import> subroutine appears before |
715 | that C<use> statement. | |
716 | ||
717 | ||
718 | =head2 Using Filter::Simple and Exporter together | |
719 | ||
720 | Likewise, Filter::Simple is also smart enough | |
721 | to Do The Right Thing if you use Exporter: | |
722 | ||
1b629f66 RGS |
723 | package Switch; |
724 | use base Exporter; | |
725 | use Filter::Simple; | |
201f4820 | 726 | |
1b629f66 RGS |
727 | @EXPORT = qw(switch case); |
728 | @EXPORT_OK = qw(given when); | |
201f4820 | 729 | |
1b629f66 | 730 | FILTER { $_ = magic_Perl_filter($_) } |
201f4820 JH |
731 | |
732 | Immediately after the filter has been applied to the source, | |
733 | Filter::Simple will pass control to Exporter, so it can do its magic too. | |
734 | ||
735 | Of course, here too, Filter::Simple has to know you're using Exporter | |
736 | before it applies the filter. That's almost never a problem, but if you're | |
737 | nervous about it, you can guarantee that things will work correctly by | |
738 | ensuring that your C<use base Exporter> always precedes your | |
739 | C<use Filter::Simple>. | |
55a1c97c | 740 | |
fbe2c49e | 741 | |
b38acab9 JH |
742 | =head2 How it works |
743 | ||
fbe2c49e JH |
744 | The Filter::Simple module exports into the package that calls C<FILTER> |
745 | (or C<use>s it directly) -- such as package "BANG" in the above example -- | |
746 | two automagically constructed | |
b38acab9 JH |
747 | subroutines -- C<import> and C<unimport> -- which take care of all the |
748 | nasty details. | |
749 | ||
750 | In addition, the generated C<import> subroutine passes its own argument | |
751 | list to the filtering subroutine, so the BANG.pm filter could easily | |
752 | be made parametric: | |
753 | ||
1b629f66 | 754 | package BANG; |
fbe2c49e | 755 | |
1b629f66 RGS |
756 | use Filter::Simple; |
757 | ||
758 | FILTER { | |
759 | my ($die_msg, $var_name) = @_; | |
760 | s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g; | |
761 | }; | |
b38acab9 | 762 | |
1b629f66 | 763 | # and in some user code: |
b38acab9 | 764 | |
1b629f66 | 765 | use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM |
b38acab9 JH |
766 | |
767 | ||
fbe2c49e JH |
768 | The specified filtering subroutine is called every time a C<use BANG> is |
769 | encountered, and passed all the source code following that call, up to | |
770 | either the next C<no BANG;> (or whatever terminator you've set) or the | |
771 | end of the source file, whichever occurs first. By default, any C<no | |
772 | BANG;> call must appear by itself on a separate line, or it is ignored. | |
b38acab9 JH |
773 | |
774 | ||
775 | =head1 AUTHOR | |
776 | ||
777 | Damian Conway (damian@conway.org) | |
778 | ||
779 | =head1 COPYRIGHT | |
780 | ||
55a1c97c JH |
781 | Copyright (c) 2000-2001, Damian Conway. All Rights Reserved. |
782 | This module is free software. It may be used, redistributed | |
1b629f66 | 783 | and/or modified under the same terms as Perl itself. |