Commit | Line | Data |
---|---|---|
b38acab9 JH |
1 | package Filter::Simple; |
2 | ||
dfa18578 | 3 | use Text::Balanced ':ALL'; |
b38acab9 | 4 | |
dfa18578 JH |
5 | use vars qw{ $VERSION @EXPORT }; |
6 | ||
55909bcd | 7 | $VERSION = '0.78'; |
b38acab9 JH |
8 | |
9 | use Filter::Util::Call; | |
10 | use Carp; | |
11 | ||
dfa18578 JH |
12 | @EXPORT = qw( FILTER FILTER_ONLY ); |
13 | ||
14 | ||
b38acab9 | 15 | sub import { |
fbe2c49e | 16 | if (@_>1) { shift; goto &FILTER } |
dfa18578 | 17 | else { *{caller()."::$_"} = \&$_ foreach @EXPORT } |
fbe2c49e JH |
18 | } |
19 | ||
20 | sub FILTER (&;$) { | |
b38acab9 | 21 | my $caller = caller; |
fbe2c49e | 22 | my ($filter, $terminator) = @_; |
55909bcd | 23 | local $SIG{__WARN__} = sub{}; |
fbe2c49e | 24 | *{"${caller}::import"} = gen_filter_import($caller,$filter,$terminator); |
201f4820 | 25 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
b38acab9 JH |
26 | } |
27 | ||
dfa18578 JH |
28 | sub fail { |
29 | croak "FILTER_ONLY: ", @_; | |
30 | } | |
31 | ||
32 | my $exql = sub { | |
33 | my @bits = extract_quotelike $_[0], qr//; | |
34 | return unless $bits[0]; | |
35 | return \@bits; | |
36 | }; | |
37 | ||
38 | my $ws = qr/\s+/; | |
39 | my $id = qr/\b(?!([ysm]|q[rqxw]?|tr)\b)\w+/; | |
40 | my $EOP = qr/\n\n|\Z/; | |
41 | my $CUT = qr/\n=cut.*$EOP/; | |
42 | my $pod_or_DATA = qr/ | |
43 | ^=(?:head[1-4]|item) .*? $CUT | |
44 | | ^=pod .*? $CUT | |
45 | | ^=for .*? $EOP | |
46 | | ^=begin \s* (\S+) .*? \n=end \s* \1 .*? $EOP | |
126dd235 | 47 | | ^__(DATA|END)__\r?\n.* |
dfa18578 JH |
48 | /smx; |
49 | ||
50 | my %extractor_for = ( | |
51 | quotelike => [ $ws, $id, { MATCH => \&extract_quotelike } ], | |
52 | regex => [ $ws, $pod_or_DATA, $id, $exql ], | |
53 | string => [ $ws, $pod_or_DATA, $id, $exql ], | |
54 | code => [ $ws, { DONT_MATCH => $pod_or_DATA }, | |
55 | $id, { DONT_MATCH => \&extract_quotelike } ], | |
56 | executable => [ $ws, { DONT_MATCH => $pod_or_DATA } ], | |
57 | all => [ { MATCH => qr/(?s:.*)/ } ], | |
58 | ); | |
59 | ||
60 | my %selector_for = ( | |
61 | all => sub { my ($t)=@_; sub{ $_=$$_; $t->(@_); $_} }, | |
62 | executable=> sub { my ($t)=@_; sub{ref() ? $_=$$_ : $t->(@_); $_} }, | |
63 | quotelike => sub { my ($t)=@_; sub{ref() && do{$_=$$_; $t->(@_)}; $_} }, | |
64 | regex => sub { my ($t)=@_; | |
65 | sub{ref() or return $_; | |
66 | my ($ql,undef,$pre,$op,$ld,$pat) = @$_; | |
67 | return $_->[0] unless $op =~ /^(qr|m|s)/ | |
68 | || !$op && ($ld eq '/' || $ld eq '?'); | |
69 | $_ = $pat; | |
70 | $t->(@_); | |
71 | $ql =~ s/^(\s*\Q$op\E\s*\Q$ld\E)\Q$pat\E/$1$_/; | |
72 | return "$pre$ql"; | |
73 | }; | |
74 | }, | |
75 | string => sub { my ($t)=@_; | |
76 | sub{ref() or return $_; | |
77 | local *args = \@_; | |
78 | my ($pre,$op,$ld1,$str1,$rd1,$ld2,$str2,$rd2,$flg) = @{$_}[2..10]; | |
79 | return $_->[0] if $op =~ /^(qr|m)/ | |
80 | || !$op && ($ld1 eq '/' || $ld1 eq '?'); | |
81 | if (!$op || $op eq 'tr' || $op eq 'y') { | |
82 | local *_ = \$str1; | |
83 | $t->(@args); | |
84 | } | |
85 | if ($op =~ /^(tr|y|s)/) { | |
86 | local *_ = \$str2; | |
87 | $t->(@args); | |
88 | } | |
89 | my $result = "$pre$op$ld1$str1$rd1"; | |
90 | $result .= $ld2 if $ld1 =~ m/[[({<]/; #])}> | |
91 | $result .= "$str2$rd2$flg"; | |
92 | return $result; | |
93 | }; | |
94 | }, | |
95 | ); | |
96 | ||
97 | ||
98 | sub gen_std_filter_for { | |
99 | my ($type, $transform) = @_; | |
100 | return sub { my (@pieces, $instr); | |
55909bcd | 101 | $DB::single=1; |
dfa18578 JH |
102 | for (extract_multiple($_,$extractor_for{$type})) { |
103 | if (ref()) { push @pieces, $_; $instr=0 } | |
104 | elsif ($instr) { $pieces[-1] .= $_ } | |
105 | else { push @pieces, $_; $instr=1 } | |
106 | } | |
107 | if ($type eq 'code') { | |
108 | my $count = 0; | |
109 | local $placeholder = qr/\Q$;\E(?:\C{4})\Q$;\E/; | |
110 | my $extractor = qr/\Q$;\E(\C{4})\Q$;\E/; | |
111 | $_ = join "", | |
112 | map { ref $_ ? $;.pack('N',$count++).$; : $_ } | |
113 | @pieces; | |
114 | @pieces = grep { ref $_ } @pieces; | |
115 | $transform->(@_); | |
116 | s/$extractor/${$pieces[unpack('N',$1)]}/g; | |
117 | } | |
118 | else { | |
dfa18578 JH |
119 | my $selector = $selector_for{$type}->($transform); |
120 | $_ = join "", map $selector->(@_), @pieces; | |
121 | } | |
122 | } | |
123 | }; | |
124 | ||
125 | sub FILTER_ONLY { | |
dfa18578 JH |
126 | my $caller = caller; |
127 | while (@_ > 1) { | |
128 | my ($what, $how) = splice(@_, 0, 2); | |
129 | fail "Unknown selector: $what" | |
130 | unless exists $extractor_for{$what}; | |
131 | fail "Filter for $what is not a subroutine reference" | |
132 | unless ref $how eq 'CODE'; | |
133 | push @transforms, gen_std_filter_for($what,$how); | |
134 | } | |
135 | my $terminator = shift; | |
136 | ||
137 | my $multitransform = sub { | |
138 | foreach my $transform ( @transforms ) { | |
139 | $transform->(@_); | |
140 | } | |
141 | }; | |
142 | no warnings 'redefine'; | |
143 | *{"${caller}::import"} = | |
144 | gen_filter_import($caller,$multitransform,$terminator); | |
201f4820 | 145 | *{"${caller}::unimport"} = gen_filter_unimport($caller); |
dfa18578 JH |
146 | } |
147 | ||
148 | my $ows = qr/(?:[ \t]+|#[^\n]*)*/; | |
149 | ||
b38acab9 | 150 | sub gen_filter_import { |
fbe2c49e | 151 | my ($class, $filter, $terminator) = @_; |
201f4820 JH |
152 | my %terminator; |
153 | my $prev_import = *{$class."::import"}{CODE}; | |
b38acab9 JH |
154 | return sub { |
155 | my ($imported_class, @args) = @_; | |
dfa18578 | 156 | my $def_terminator = |
126dd235 | 157 | qr/^(?:\s*no\s+$imported_class\s*;$ows|__(?:END|DATA)__)\r?$/; |
dfa18578 | 158 | if (!defined $terminator) { |
201f4820 | 159 | $terminator{terminator} = $def_terminator; |
dfa18578 | 160 | } |
201f4820 JH |
161 | elsif (!ref $terminator || ref $terminator eq 'Regexp') { |
162 | $terminator{terminator} = $terminator; | |
dfa18578 JH |
163 | } |
164 | elsif (ref $terminator ne 'HASH') { | |
165 | croak "Terminator must be specified as scalar or hash ref" | |
166 | } | |
167 | elsif (!exists $terminator->{terminator}) { | |
201f4820 | 168 | $terminator{terminator} = $def_terminator; |
dfa18578 | 169 | } |
b38acab9 JH |
170 | filter_add( |
171 | sub { | |
dfa18578 | 172 | my ($status, $lastline); |
fbe2c49e | 173 | my $count = 0; |
b38acab9 JH |
174 | my $data = ""; |
175 | while ($status = filter_read()) { | |
fbe2c49e | 176 | return $status if $status < 0; |
201f4820 JH |
177 | if ($terminator{terminator} && |
178 | m/$terminator{terminator}/) { | |
dfa18578 | 179 | $lastline = $_; |
b38acab9 JH |
180 | last; |
181 | } | |
182 | $data .= $_; | |
fbe2c49e | 183 | $count++; |
b38acab9 JH |
184 | $_ = ""; |
185 | } | |
186 | $_ = $data; | |
dfa18578 JH |
187 | $filter->($imported_class, @args) unless $status < 0; |
188 | if (defined $lastline) { | |
201f4820 JH |
189 | if (defined $terminator{becomes}) { |
190 | $_ .= $terminator{becomes}; | |
dfa18578 JH |
191 | } |
192 | elsif ($lastline =~ $def_terminator) { | |
193 | $_ .= $lastline; | |
194 | } | |
195 | } | |
fbe2c49e | 196 | return $count; |
b38acab9 JH |
197 | } |
198 | ); | |
201f4820 JH |
199 | if ($prev_import) { |
200 | goto &$prev_import; | |
201 | } | |
202 | elsif ($class->isa('Exporter')) { | |
203 | $class->export_to_level(1,@_); | |
204 | } | |
b38acab9 JH |
205 | } |
206 | } | |
207 | ||
201f4820 JH |
208 | sub gen_filter_unimport { |
209 | my ($class) = @_; | |
210 | my $prev_unimport = *{$class."::unimport"}{CODE}; | |
211 | return sub { | |
212 | filter_del(); | |
213 | goto &$prev_unimport if $prev_unimport; | |
214 | } | |
b38acab9 JH |
215 | } |
216 | ||
217 | 1; | |
218 | ||
219 | __END__ | |
220 | ||
221 | =head1 NAME | |
222 | ||
223 | Filter::Simple - Simplified source filtering | |
224 | ||
fbe2c49e | 225 | |
b38acab9 JH |
226 | =head1 SYNOPSIS |
227 | ||
228 | # in MyFilter.pm: | |
229 | ||
230 | package MyFilter; | |
231 | ||
fbe2c49e JH |
232 | use Filter::Simple; |
233 | ||
234 | FILTER { ... }; | |
b38acab9 | 235 | |
fbe2c49e JH |
236 | # or just: |
237 | # | |
238 | # use Filter::Simple sub { ... }; | |
b38acab9 JH |
239 | |
240 | # in user's code: | |
241 | ||
242 | use MyFilter; | |
243 | ||
244 | # this code is filtered | |
245 | ||
246 | no MyFilter; | |
247 | ||
248 | # this code is not | |
249 | ||
250 | ||
251 | =head1 DESCRIPTION | |
252 | ||
253 | =head2 The Problem | |
254 | ||
255 | Source filtering is an immensely powerful feature of recent versions of Perl. | |
256 | It allows one to extend the language itself (e.g. the Switch module), to | |
257 | simplify the language (e.g. Language::Pythonesque), or to completely recast the | |
258 | language (e.g. Lingua::Romana::Perligata). Effectively, it allows one to use | |
259 | the full power of Perl as its own, recursively applied, macro language. | |
260 | ||
261 | The excellent Filter::Util::Call module (by Paul Marquess) provides a | |
262 | usable Perl interface to source filtering, but it is often too powerful | |
263 | and not nearly as simple as it could be. | |
264 | ||
265 | To use the module it is necessary to do the following: | |
266 | ||
267 | =over 4 | |
268 | ||
269 | =item 1. | |
270 | ||
271 | Download, build, and install the Filter::Util::Call module. | |
55a1c97c | 272 | (If you have Perl 5.7.1 or later, this is already done for you.) |
b38acab9 JH |
273 | |
274 | =item 2. | |
275 | ||
276 | Set up a module that does a C<use Filter::Util::Call>. | |
277 | ||
278 | =item 3. | |
279 | ||
280 | Within that module, create an C<import> subroutine. | |
281 | ||
282 | =item 4. | |
283 | ||
284 | Within the C<import> subroutine do a call to C<filter_add>, passing | |
285 | it either a subroutine reference. | |
286 | ||
287 | =item 5. | |
288 | ||
289 | Within the subroutine reference, call C<filter_read> or C<filter_read_exact> | |
290 | to "prime" $_ with source code data from the source file that will | |
291 | C<use> your module. Check the status value returned to see if any | |
292 | source code was actually read in. | |
293 | ||
294 | =item 6. | |
295 | ||
296 | Process the contents of $_ to change the source code in the desired manner. | |
297 | ||
298 | =item 7. | |
299 | ||
300 | Return the status value. | |
301 | ||
302 | =item 8. | |
303 | ||
304 | If the act of unimporting your module (via a C<no>) should cause source | |
305 | code filtering to cease, create an C<unimport> subroutine, and have it call | |
306 | C<filter_del>. Make sure that the call to C<filter_read> or | |
307 | C<filter_read_exact> in step 5 will not accidentally read past the | |
308 | C<no>. Effectively this limits source code filters to line-by-line | |
309 | operation, unless the C<import> subroutine does some fancy | |
310 | pre-pre-parsing of the source code it's filtering. | |
311 | ||
312 | =back | |
313 | ||
314 | For example, here is a minimal source code filter in a module named | |
315 | BANG.pm. It simply converts every occurrence of the sequence C<BANG\s+BANG> | |
316 | to the sequence C<die 'BANG' if $BANG> in any piece of code following a | |
317 | C<use BANG;> statement (until the next C<no BANG;> statement, if any): | |
318 | ||
319 | package BANG; | |
fbe2c49e | 320 | |
b38acab9 JH |
321 | use Filter::Util::Call ; |
322 | ||
323 | sub import { | |
324 | filter_add( sub { | |
325 | my $caller = caller; | |
326 | my ($status, $no_seen, $data); | |
327 | while ($status = filter_read()) { | |
fbe2c49e | 328 | if (/^\s*no\s+$caller\s*;\s*?$/) { |
b38acab9 JH |
329 | $no_seen=1; |
330 | last; | |
331 | } | |
332 | $data .= $_; | |
333 | $_ = ""; | |
334 | } | |
335 | $_ = $data; | |
336 | s/BANG\s+BANG/die 'BANG' if \$BANG/g | |
337 | unless $status < 0; | |
338 | $_ .= "no $class;\n" if $no_seen; | |
339 | return 1; | |
340 | }) | |
341 | } | |
342 | ||
343 | sub unimport { | |
344 | filter_del(); | |
345 | } | |
346 | ||
347 | 1 ; | |
348 | ||
7bf0340c JH |
349 | This level of sophistication puts filtering out of the reach of |
350 | many programmers. | |
b38acab9 JH |
351 | |
352 | ||
353 | =head2 A Solution | |
354 | ||
7bf0340c | 355 | The Filter::Simple module provides a simplified interface to |
b38acab9 JH |
356 | Filter::Util::Call; one that is sufficient for most common cases. |
357 | ||
358 | Instead of the above process, with Filter::Simple the task of setting up | |
359 | a source code filter is reduced to: | |
360 | ||
361 | =over 4 | |
362 | ||
363 | =item 1. | |
364 | ||
55a1c97c JH |
365 | Download and install the Filter::Simple module. |
366 | (If you have Perl 5.7.1 or later, this is already done for you.) | |
367 | ||
368 | =item 2. | |
369 | ||
fbe2c49e JH |
370 | Set up a module that does a C<use Filter::Simple> and then |
371 | calls C<FILTER { ... }>. | |
b38acab9 | 372 | |
55a1c97c | 373 | =item 3. |
b38acab9 | 374 | |
fbe2c49e JH |
375 | Within the anonymous subroutine or block that is passed to |
376 | C<FILTER>, process the contents of $_ to change the source code in | |
377 | the desired manner. | |
b38acab9 JH |
378 | |
379 | =back | |
380 | ||
381 | In other words, the previous example, would become: | |
382 | ||
383 | package BANG; | |
fbe2c49e JH |
384 | use Filter::Simple; |
385 | ||
386 | FILTER { | |
b38acab9 JH |
387 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; |
388 | }; | |
389 | ||
390 | 1 ; | |
391 | ||
55909bcd JH |
392 | Note that the source code is passed as a single string, so any regex that |
393 | uses C<^> or C<$> to detect line boundaries will need the C</m> flag. | |
b38acab9 | 394 | |
fbe2c49e JH |
395 | =head2 Disabling or changing <no> behaviour |
396 | ||
dfa18578 JH |
397 | By default, the installed filter only filters up to a line consisting of one of |
398 | the three standard source "terminators": | |
399 | ||
400 | no ModuleName; # optional comment | |
fbe2c49e | 401 | |
dfa18578 | 402 | or: |
fbe2c49e | 403 | |
dfa18578 JH |
404 | __END__ |
405 | ||
406 | or: | |
407 | ||
408 | __DATA__ | |
409 | ||
410 | but this can be altered by passing a second argument to C<use Filter::Simple> | |
411 | or C<FILTER> (just remember: there's I<no> comma after the initial block when | |
412 | you use C<FILTER>). | |
fbe2c49e JH |
413 | |
414 | That second argument may be either a C<qr>'d regular expression (which is then | |
415 | used to match the terminator line), or a defined false value (which indicates | |
dfa18578 JH |
416 | that no terminator line should be looked for), or a reference to a hash |
417 | (in which case the terminator is the value associated with the key | |
418 | C<'terminator'>. | |
fbe2c49e JH |
419 | |
420 | For example, to cause the previous filter to filter only up to a line of the | |
421 | form: | |
422 | ||
423 | GNAB esu; | |
424 | ||
425 | you would write: | |
426 | ||
427 | package BANG; | |
428 | use Filter::Simple; | |
429 | ||
430 | FILTER { | |
431 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
432 | } | |
dfa18578 JH |
433 | qr/^\s*GNAB\s+esu\s*;\s*?$/; |
434 | ||
435 | or: | |
436 | ||
437 | FILTER { | |
438 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
439 | } | |
440 | { terminator => qr/^\s*GNAB\s+esu\s*;\s*?$/ }; | |
fbe2c49e JH |
441 | |
442 | and to prevent the filter's being turned off in any way: | |
443 | ||
444 | package BANG; | |
445 | use Filter::Simple; | |
446 | ||
447 | FILTER { | |
448 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
449 | } | |
dfa18578 JH |
450 | ""; # or: 0 |
451 | ||
452 | or: | |
453 | ||
454 | FILTER { | |
455 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
456 | } | |
457 | { terminator => "" }; | |
458 | ||
55909bcd | 459 | B<Note that, no matter what you set the terminator pattern to, |
dfa18578 | 460 | the actual terminator itself I<must> be contained on a single source line.> |
fbe2c49e JH |
461 | |
462 | ||
463 | =head2 All-in-one interface | |
464 | ||
465 | Separating the loading of Filter::Simple: | |
466 | ||
467 | use Filter::Simple; | |
468 | ||
469 | from the setting up of the filtering: | |
470 | ||
471 | FILTER { ... }; | |
472 | ||
473 | is useful because it allows other code (typically parser support code | |
474 | or caching variables) to be defined before the filter is invoked. | |
475 | However, there is often no need for such a separation. | |
476 | ||
477 | In those cases, it is easier to just append the filtering subroutine and | |
478 | any terminator specification directly to the C<use> statement that loads | |
479 | Filter::Simple, like so: | |
480 | ||
481 | use Filter::Simple sub { | |
482 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
483 | }; | |
484 | ||
485 | This is exactly the same as: | |
486 | ||
487 | use Filter::Simple; | |
488 | BEGIN { | |
489 | Filter::Simple::FILTER { | |
490 | s/BANG\s+BANG/die 'BANG' if \$BANG/g; | |
491 | }; | |
492 | } | |
493 | ||
494 | except that the C<FILTER> subroutine is not exported by Filter::Simple. | |
495 | ||
dfa18578 JH |
496 | |
497 | =head2 Filtering only specific components of source code | |
498 | ||
499 | One of the problems with a filter like: | |
500 | ||
501 | use Filter::Simple; | |
502 | ||
503 | FILTER { s/BANG\s+BANG/die 'BANG' if \$BANG/g }; | |
504 | ||
505 | is that it indiscriminately applies the specified transformation to | |
506 | the entire text of your source program. So something like: | |
507 | ||
508 | warn 'BANG BANG, YOU'RE DEAD'; | |
509 | BANG BANG; | |
510 | ||
511 | will become: | |
512 | ||
513 | warn 'die 'BANG' if $BANG, YOU'RE DEAD'; | |
514 | die 'BANG' if $BANG; | |
515 | ||
516 | It is very common when filtering source to only want to apply the filter | |
517 | to the non-character-string parts of the code, or alternatively to I<only> | |
518 | the character strings. | |
519 | ||
520 | Filter::Simple supports this type of filtering by automatically | |
521 | exporting the C<FILTER_ONLY> subroutine. | |
522 | ||
523 | C<FILTER_ONLY> takes a sequence of specifiers that install separate | |
524 | (and possibly multiple) filters that act on only parts of the source code. | |
525 | For example: | |
526 | ||
527 | use Filter::Simple; | |
528 | ||
529 | FILTER_ONLY | |
530 | code => sub { s/BANG\s+BANG/die 'BANG' if \$BANG/g }, | |
55909bcd | 531 | quotelike => sub { s/BANG\s+BANG/CHITTY CHITTY/g }; |
dfa18578 JH |
532 | |
533 | The C<"code"> subroutine will only be used to filter parts of the source | |
534 | code that are not quotelikes, POD, or C<__DATA__>. The C<quotelike> | |
535 | subroutine only filters Perl quotelikes (including here documents). | |
536 | ||
537 | The full list of alternatives is: | |
538 | ||
539 | =over | |
540 | ||
541 | =item C<"code"> | |
542 | ||
543 | Filters only those sections of the source code that are not quotelikes, POD, or | |
544 | C<__DATA__>. | |
545 | ||
546 | =item C<"executable"> | |
547 | ||
548 | Filters only those sections of the source code that are not POD or C<__DATA__>. | |
549 | ||
550 | =item C<"quotelike"> | |
551 | ||
552 | Filters only Perl quotelikes (as interpreted by | |
553 | C<&Text::Balanced::extract_quotelike>). | |
554 | ||
555 | =item C<"string"> | |
556 | ||
557 | Filters only the string literal parts of a Perl quotelike (i.e. the | |
558 | contents of a string literal, either half of a C<tr///>, the second | |
559 | half of an C<s///>). | |
560 | ||
561 | =item C<"regex"> | |
562 | ||
563 | Filters only the pattern literal parts of a Perl quotelike (i.e. the | |
564 | contents of a C<qr//> or an C<m//>, the first half of an C<s///>). | |
565 | ||
566 | =item C<"all"> | |
567 | ||
568 | Filters everything. Identical in effect to C<FILTER>. | |
569 | ||
570 | =back | |
571 | ||
572 | Except for C<< FILTER_ONLY code => sub {...} >>, each of | |
573 | the component filters is called repeatedly, once for each component | |
574 | found in the source code. | |
575 | ||
576 | Note that you can also apply two or more of the same type of filter in | |
577 | a single C<FILTER_ONLY>. For example, here's a simple | |
578 | macro-preprocessor that is only applied within regexes, | |
55909bcd | 579 | with a final debugging pass that prints the resulting source code: |
dfa18578 JH |
580 | |
581 | use Regexp::Common; | |
582 | FILTER_ONLY | |
583 | regex => sub { s/!\[/[^/g }, | |
584 | regex => sub { s/%d/$RE{num}{int}/g }, | |
585 | regex => sub { s/%f/$RE{num}{real}/g }, | |
586 | all => sub { print if $::DEBUG }; | |
587 | ||
588 | ||
589 | ||
590 | =head2 Filtering only the code parts of source code | |
591 | ||
592 | Most source code ceases to be grammatically correct when it is broken up | |
593 | into the pieces between string literals and regexes. So the C<'code'> | |
594 | component filter behaves slightly differently from the other partial filters | |
595 | described in the previous section. | |
596 | ||
597 | Rather than calling the specified processor on each individual piece of | |
598 | code (i.e. on the bits between quotelikes), the C<'code'> partial filter | |
599 | operates on the entire source code, but with the quotelike bits | |
600 | "blanked out". | |
601 | ||
602 | That is, a C<'code'> filter I<replaces> each quoted string, quotelike, | |
603 | regex, POD, and __DATA__ section with a placeholder. The | |
604 | delimiters of this placeholder are the contents of the C<$;> variable | |
605 | at the time the filter is applied (normally C<"\034">). The remaining | |
606 | four bytes are a unique identifier for the component being replaced. | |
607 | ||
608 | This approach makes it comparatively easy to write code preprocessors | |
609 | without worrying about the form or contents of strings, regexes, etc. | |
610 | For convenience, during a C<'code'> filtering operation, Filter::Simple | |
611 | provides a package variable (C<$Filter::Simple::placeholder>) that contains | |
612 | a pre-compiled regex that matches any placeholder. Placeholders can be | |
613 | moved and re-ordered within the source code as needed. | |
614 | ||
615 | Once the filtering has been applied, the original strings, regexes, | |
616 | POD, etc. are re-inserted into the code, by replacing each | |
617 | placeholder with the corresponding original component. | |
618 | ||
619 | For example, the following filter detects concatentated pairs of | |
620 | strings/quotelikes and reverses the order in which they are | |
621 | concatenated: | |
622 | ||
623 | package DemoRevCat; | |
624 | use Filter::Simple; | |
625 | ||
626 | FILTER_ONLY code => sub { my $ph = $Filter::Simple::placeholder; | |
627 | s{ ($ph) \s* [.] \s* ($ph) }{ $2.$1 }gx | |
628 | }; | |
629 | ||
630 | Thus, the following code: | |
631 | ||
632 | use DemoRevCat; | |
633 | ||
634 | my $str = "abc" . q(def); | |
635 | ||
636 | print "$str\n"; | |
637 | ||
638 | would become: | |
639 | ||
640 | my $str = q(def)."abc"; | |
641 | ||
642 | print "$str\n"; | |
643 | ||
644 | and hence print: | |
645 | ||
646 | defabc | |
647 | ||
648 | ||
201f4820 | 649 | =head2 Using Filter::Simple with an explicit C<import> subroutine |
55a1c97c | 650 | |
201f4820 JH |
651 | Filter::Simple generates a special C<import> subroutine for |
652 | your module (see L<"How it works">) which would normally replace any | |
653 | C<import> subroutine you might have explicitly declared. | |
55a1c97c | 654 | |
201f4820 JH |
655 | However, Filter::Simple is smart enough to notice your existing |
656 | C<import> and Do The Right Thing with it. | |
55909bcd | 657 | That is, if you explicitly define an C<import> subroutine in a package |
201f4820 JH |
658 | that's using Filter::Simple, that C<import> subroutine will still |
659 | be invoked immediately after any filter you install. | |
55a1c97c | 660 | |
201f4820 JH |
661 | The only thing you have to remember is that the C<import> subroutine |
662 | I<must> be declared I<before> the filter is installed. If you use C<FILTER> | |
663 | to install the filter: | |
55a1c97c | 664 | |
201f4820 | 665 | package Filter::TurnItUpTo11; |
55a1c97c | 666 | |
201f4820 | 667 | use Filter::Simple; |
55a1c97c | 668 | |
201f4820 JH |
669 | FILTER { s/(\w+)/\U$1/ }; |
670 | ||
671 | that will almost never be a problem, but if you install a filtering | |
672 | subroutine by passing it directly to the C<use Filter::Simple> | |
673 | statement: | |
55a1c97c | 674 | |
201f4820 | 675 | package Filter::TurnItUpTo11; |
55a1c97c | 676 | |
201f4820 | 677 | use Filter::Simple sub{ s/(\w+)/\U$1/ }; |
55a1c97c | 678 | |
201f4820 JH |
679 | then you must make sure that your C<import> subroutine appears before |
680 | that C<use> statement. | |
681 | ||
682 | ||
683 | =head2 Using Filter::Simple and Exporter together | |
684 | ||
685 | Likewise, Filter::Simple is also smart enough | |
686 | to Do The Right Thing if you use Exporter: | |
687 | ||
688 | package Switch; | |
689 | use base Exporter; | |
690 | use Filter::Simple; | |
691 | ||
692 | @EXPORT = qw(switch case); | |
693 | @EXPORT_OK = qw(given when); | |
694 | ||
695 | FILTER { $_ = magic_Perl_filter($_) } | |
696 | ||
697 | Immediately after the filter has been applied to the source, | |
698 | Filter::Simple will pass control to Exporter, so it can do its magic too. | |
699 | ||
700 | Of course, here too, Filter::Simple has to know you're using Exporter | |
701 | before it applies the filter. That's almost never a problem, but if you're | |
702 | nervous about it, you can guarantee that things will work correctly by | |
703 | ensuring that your C<use base Exporter> always precedes your | |
704 | C<use Filter::Simple>. | |
55a1c97c | 705 | |
fbe2c49e | 706 | |
b38acab9 JH |
707 | =head2 How it works |
708 | ||
fbe2c49e JH |
709 | The Filter::Simple module exports into the package that calls C<FILTER> |
710 | (or C<use>s it directly) -- such as package "BANG" in the above example -- | |
711 | two automagically constructed | |
b38acab9 JH |
712 | subroutines -- C<import> and C<unimport> -- which take care of all the |
713 | nasty details. | |
714 | ||
715 | In addition, the generated C<import> subroutine passes its own argument | |
716 | list to the filtering subroutine, so the BANG.pm filter could easily | |
717 | be made parametric: | |
718 | ||
719 | package BANG; | |
fbe2c49e JH |
720 | |
721 | use Filter::Simple; | |
722 | ||
723 | FILTER { | |
b38acab9 JH |
724 | my ($die_msg, $var_name) = @_; |
725 | s/BANG\s+BANG/die '$die_msg' if \${$var_name}/g; | |
726 | }; | |
727 | ||
728 | # and in some user code: | |
729 | ||
fbe2c49e | 730 | use BANG "BOOM", "BAM"; # "BANG BANG" becomes: die 'BOOM' if $BAM |
b38acab9 JH |
731 | |
732 | ||
fbe2c49e JH |
733 | The specified filtering subroutine is called every time a C<use BANG> is |
734 | encountered, and passed all the source code following that call, up to | |
735 | either the next C<no BANG;> (or whatever terminator you've set) or the | |
736 | end of the source file, whichever occurs first. By default, any C<no | |
737 | BANG;> call must appear by itself on a separate line, or it is ignored. | |
b38acab9 JH |
738 | |
739 | ||
740 | =head1 AUTHOR | |
741 | ||
742 | Damian Conway (damian@conway.org) | |
743 | ||
744 | =head1 COPYRIGHT | |
745 | ||
55a1c97c JH |
746 | Copyright (c) 2000-2001, Damian Conway. All Rights Reserved. |
747 | This module is free software. It may be used, redistributed | |
748 | and/or modified under the same terms as Perl itself. |