This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Let state sub fwd decls and nested subs work in anons
[perl5.git] / x2p / s2p.PL
CommitLineData
86a59229 1#!/usr/bin/perl
4633a7c4
LW
2
3use Config;
4use File::Basename qw(&basename &dirname);
8a5546a1 5use Cwd;
231bc313
JH
6use subs qw(link);
7
6ef9d486 8sub link { # This is a cut-down version of installperl:link().
231bc313
JH
9 my($from,$to) = @_;
10 my($success) = 0;
11
12 eval {
13 CORE::link($from, $to)
14 ? $success++
15 : ($from =~ m#^/afs/# || $to =~ m#^/afs/#)
16 ? die "AFS" # okay inside eval {}
17 : die "Couldn't link $from to $to: $!\n";
18 };
19 if ($@) {
231bc313
JH
20 require File::Copy;
21 File::Copy::copy($from, $to)
22 ? $success++
23 : warn "Couldn't copy $from to $to: $!\n";
24 }
25 $success;
26}
4633a7c4
LW
27
28# List explicitly here the variables you want Configure to
29# generate. Metaconfig only looks for shell variables, so you
30# have to mention them as if they were shell variables, not
31# %Config entries. Thus you write
32# $startperl
33# to ensure Configure will look for $Config{startperl}.
34
35# This forces PL files to create target in same directory as PL file.
36# This is so that make depend always knows where to find PL derivatives.
8a5546a1 37$origdir = cwd;
44a8e56a 38chdir dirname($0);
39$file = basename($0, '.PL');
774d564b 40$file .= '.com' if $^O eq 'VMS';
4633a7c4
LW
41
42open OUT,">$file" or die "Can't create $file: $!";
43
44print "Extracting $file (with variable substitutions)\n";
45
46# In this section, perl variables will be expanded during extraction.
47# You can use $Config{...} to use Configure variables.
48
49print OUT <<"!GROK!THIS!";
5f05dabc 50$Config{startperl}
51 eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
52 if \$running_under_some_shell;
c0393c90
JH
53my \$startperl;
54my \$perlpath;
ed6d8ea1
JH
55(\$startperl = <<'/../') =~ s/\\s*\\z//;
56$Config{startperl}
57/../
58(\$perlpath = <<'/../') =~ s/\\s*\\z//;
59$Config{perlpath}
60/../
a687059c
LW
61!GROK!THIS!
62
4633a7c4
LW
63# In the following, perl variables are not expanded during extraction.
64
65print OUT <<'!NO!SUBS!';
a687059c 66
9ae2b9a0 67$0 =~ s/^.*?(\w+)[\.\w]*$/$1/;
86a59229
WL
68
69# (p)sed - a stream editor
70# History: Aug 12 2000: Original version.
d16f50bd 71# Mar 25 2002: Rearrange generated Perl program.
e2f73067 72# Jul 23 2007: Fix bug in regex stripping (M.Thorland)
86a59229
WL
73
74use strict;
75use integer;
76use Symbol;
8d063cd8 77
d83e3bda
JM
78=head1 NAME
79
ba34211e 80psed - a stream editor
d83e3bda
JM
81
82=head1 SYNOPSIS
83
ba34211e
JH
84 psed [-an] script [file ...]
85 psed [-an] [-e script] [-f script-file] [file ...]
86
87 s2p [-an] [-e script] [-f script-file]
d83e3bda
JM
88
89=head1 DESCRIPTION
90
86a59229
WL
91A stream editor reads the input stream consisting of the specified files
92(or standard input, if none are given), processes is line by line by
93applying a script consisting of edit commands, and writes resulting lines
c2b27382 94to standard output. The filename 'C<->' may be used to read standard input.
86a59229
WL
95
96The edit script is composed from arguments of B<-e> options and
97script-files, in the given order. A single script argument may be specified
98as the first parameter.
99
100If this program is invoked with the name F<s2p>, it will act as a
f7964948 101sed-to-Perl translator. See L<"SED SCRIPT TRANSLATION">.
d83e3bda 102
86a59229 103B<sed> returns an exit code of 0 on success or >0 if an error occurred.
d83e3bda 104
86a59229 105=head1 OPTIONS
d83e3bda 106
86a59229 107=over 4
d83e3bda 108
86a59229 109=item B<-a>
d83e3bda 110
86a59229
WL
111A file specified as argument to the B<w> edit command is by default
112opened before input processing starts. Using B<-a>, opening of such
113files is delayed until the first line is actually written to the file.
114
115=item B<-e> I<script>
116
117The editing commands defined by I<script> are appended to the script.
118Multiple commands must be separated by newlines.
119
120=item B<-f> I<script-file>
121
122Editing commands from the specified I<script-file> are read and appended
123to the script.
d83e3bda
JM
124
125=item B<-n>
126
86a59229
WL
127By default, a line is written to standard output after the editing script
128has been applied to it. The B<-n> option suppresses automatic printing.
129
130=back
131
132=head1 COMMANDS
133
134B<sed> command syntax is defined as
135
136Z<> Z<> Z<> Z<>[I<address>[B<,>I<address>]][B<!>]I<function>[I<argument>]
137
138with whitespace being permitted before or after addresses, and between
139the function character and the argument. The I<address>es and the
140address inverter (C<!>) are used to restrict the application of a
141command to the selected line(s) of input.
142
143Each command must be on a line of its own, except where noted in
144the synopses below.
145
146The edit cycle performed on each input line consist of reading the line
147(without its trailing newline character) into the I<pattern space>,
148applying the applicable commands of the edit script, writing the final
149contents of the pattern space and a newline to the standard output.
150A I<hold space> is provided for saving the contents of the
151pattern space for later use.
152
153=head2 Addresses
154
155A sed address is either a line number or a pattern, which may be combined
156arbitrarily to construct ranges. Lines are numbered across all input files.
157
c2b27382 158Any address may be followed by an exclamation mark ('C<!>'), selecting
86a59229 159all lines not matching that address.
d83e3bda 160
86a59229 161=over 4
d83e3bda 162
86a59229
WL
163=item I<number>
164
165The line with the given number is selected.
166
167=item B<$>
168
169A dollar sign (C<$>) is the line number of the last line of the input stream.
170
171=item B</>I<regular expression>B</>
172
173A pattern address is a basic regular expression (see
f7964948 174L<"BASIC REGULAR EXPRESSIONS">), between the delimiting character C</>.
86a59229
WL
175Any other character except C<\> or newline may be used to delimit a
176pattern address when the initial delimiter is prefixed with a
c2b27382 177backslash ('C<\>').
d83e3bda
JM
178
179=back
180
86a59229 181If no address is given, the command selects every line.
d83e3bda 182
86a59229
WL
183If one address is given, it selects the line (or lines) matching the
184address.
d83e3bda 185
86a59229
WL
186Two addresses select a range that begins whenever the first address
187matches, and ends (including that line) when the second address matches.
188If the first (second) address is a matching pattern, the second
189address is not applied to the very same line to determine the end of
190the range. Likewise, if the second address is a matching pattern, the
191first address is not applied to the very same line to determine the
192begin of another range. If both addresses are line numbers,
193and the second line number is less than the first line number, then
194only the first line is selected.
d83e3bda 195
d83e3bda 196
86a59229 197=head2 Functions
d83e3bda 198
86a59229
WL
199The maximum permitted number of addresses is indicated with each
200function synopsis below.
d83e3bda 201
86a59229
WL
202The argument I<text> consists of one or more lines following the command.
203Embedded newlines in I<text> must be preceded with a backslash. Other
204backslashes in I<text> are deleted and the following character is taken
205literally.
d83e3bda 206
86a59229 207=over 4
d83e3bda 208
86a59229
WL
209=cut
210
211my %ComTab;
d16f50bd 212my %GenKey;
86a59229
WL
213#--------------------------------------------------------------------------
214$ComTab{'a'}=[ 1, 'txt', \&Emit, '{ push( @Q, <<'."'TheEnd' ) }\n" ]; #ok
215
216=item [1addr]B<a\> I<text>
217
218Write I<text> (which must start on the line following the command)
219to standard output immediately before reading the next line
220of input, either by executing the B<N> function or by beginning a new cycle.
221
222=cut
223
224#--------------------------------------------------------------------------
225$ComTab{'b'}=[ 2, 'str', \&Branch, '{ goto XXX; }' ]; #ok
226
227=item [2addr]B<b> [I<label>]
228
229Branch to the B<:> function with the specified I<label>. If no label
230is given, branch to the end of the script.
231
232=cut
233
234#--------------------------------------------------------------------------
235$ComTab{'c'}=[ 2, 'txt', \&Change, <<'-X-' ]; #ok
236{ print <<'TheEnd'; } $doPrint = 0; goto EOS;
237-X-
238### continue OK => next CYCLE;
239
240=item [2addr]B<c\> I<text>
241
242The line, or range of lines, selected by the address is deleted.
243The I<text> (which must start on the line following the command)
244is written to standard output. With an address range, this occurs at
245the end of the range.
246
247=cut
248
249#--------------------------------------------------------------------------
250$ComTab{'d'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
251{ $doPrint = 0;
252 goto EOS;
253}
254-X-
255### continue OK => next CYCLE;
256
257=item [2addr]B<d>
258
259Deletes the pattern space and starts the next cycle.
260
261=cut
262
263#--------------------------------------------------------------------------
264$ComTab{'D'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
265{ s/^.*\n?//;
266 if(length($_)){ goto BOS } else { goto EOS }
267}
268-X-
269### continue OK => next CYCLE;
270
271=item [2addr]B<D>
272
273Deletes the pattern space through the first embedded newline or to the end.
274If the pattern space becomes empty, a new cycle is started, otherwise
275execution of the script is restarted.
276
277=cut
278
279#--------------------------------------------------------------------------
280$ComTab{'g'}=[ 2, '', \&Emit, '{ $_ = $Hold };' ]; #ok
281
282=item [2addr]B<g>
283
284Replace the contents of the pattern space with the hold space.
285
286=cut
287
288#--------------------------------------------------------------------------
289$ComTab{'G'}=[ 2, '', \&Emit, '{ $_ .= "\n"; $_ .= $Hold };' ]; #ok
290
291=item [2addr]B<G>
292
293Append a newline and the contents of the hold space to the pattern space.
294
295=cut
296
297#--------------------------------------------------------------------------
298$ComTab{'h'}=[ 2, '', \&Emit, '{ $Hold = $_ }' ]; #ok
299
300=item [2addr]B<h>
301
302Replace the contents of the hold space with the pattern space.
303
304=cut
305
306#--------------------------------------------------------------------------
307$ComTab{'H'}=[ 2, '', \&Emit, '{ $Hold .= "\n"; $Hold .= $_; }' ]; #ok
308
309=item [2addr]B<H>
310
311Append a newline and the contents of the pattern space to the hold space.
312
313=cut
314
315#--------------------------------------------------------------------------
316$ComTab{'i'}=[ 1, 'txt', \&Emit, '{ print <<'."'TheEnd' }\n" ]; #ok
317
318=item [1addr]B<i\> I<text>
319
320Write the I<text> (which must start on the line following the command)
321to standard output.
322
323=cut
324
325#--------------------------------------------------------------------------
326$ComTab{'l'}=[ 2, '', \&Emit, '{ _l() }' ]; #okUTF8
327
328=item [2addr]B<l>
329
330Print the contents of the pattern space: non-printable characters are
331shown in C-style escaped form; long lines are split and have a trailing
c2b27382
JK
332^'C<\>' at the point of the split; the true end of a line is marked with
333a 'C<$>'. Escapes are: '\a', '\t', '\n', '\f', '\r', '\e' for
334BEL, HT, LF, FF, CR, ESC, respectively, and '\' followed by a three-digit
86a59229
WL
335octal number for all other non-printable characters.
336
337=cut
338
339#--------------------------------------------------------------------------
340$ComTab{'n'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
341{ print $_, "\n" if $doPrint;
d16f50bd 342 printQ() if @Q;
86a59229
WL
343 $CondReg = 0;
344 last CYCLE unless getsARGV();
345 chomp();
346}
347-X-
348
349=item [2addr]B<n>
350
351If automatic printing is enabled, write the pattern space to the standard
352output. Replace the pattern space with the next line of input. If
353there is no more input, processing is terminated.
354
355=cut
356
357#--------------------------------------------------------------------------
358$ComTab{'N'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
d16f50bd 359{ printQ() if @Q;
86a59229
WL
360 $CondReg = 0;
361 last CYCLE unless getsARGV( $h );
362 chomp( $h );
363 $_ .= "\n$h";
364}
365-X-
366
367=item [2addr]B<N>
368
369Append a newline and the next line of input to the pattern space. If
370there is no more input, processing is terminated.
371
372=cut
373
374#--------------------------------------------------------------------------
375$ComTab{'p'}=[ 2, '', \&Emit, '{ print $_, "\n"; }' ]; #ok
376
377=item [2addr]B<p>
378
379Print the pattern space to the standard output. (Use the B<-n> option
380to suppress automatic printing at the end of a cycle if you want to
381avoid double printing of lines.)
382
383=cut
384
385#--------------------------------------------------------------------------
386$ComTab{'P'}=[ 2, '', \&Emit, <<'-X-' ]; #ok
387{ if( /^(.*)/ ){ print $1, "\n"; } }
388-X-
389
390=item [2addr]B<P>
391
392Prints the pattern space through the first embedded newline or to the end.
393
394=cut
395
396#--------------------------------------------------------------------------
397$ComTab{'q'}=[ 1, '', \&Emit, <<'-X-' ]; #ok
398{ print $_, "\n" if $doPrint;
399 last CYCLE;
400}
401-X-
402
403=item [1addr]B<q>
404
405Branch to the end of the script and quit without starting a new cycle.
406
407=cut
408
409#--------------------------------------------------------------------------
410$ComTab{'r'}=[ 1, 'str', \&Emit, "{ _r( '-X-' ) }" ]; #ok
86a59229
WL
411
412=item [1addr]B<r> I<file>
413
414Copy the contents of the I<file> to standard output immediately before
415the next attempt to read a line of input. Any error encountered while
416reading I<file> is silently ignored.
417
418=cut
419
420#--------------------------------------------------------------------------
421$ComTab{'s'}=[ 2, 'sub', \&Emit, '' ]; #ok
422
423=item [2addr]B<s/>I<regular expression>B</>I<replacement>B</>I<flags>
424
425Substitute the I<replacement> string for the first substring in
426the pattern space that matches the I<regular expression>.
427Any character other than backslash or newline can be used instead of a
428slash to delimit the regular expression and the replacement.
429To use the delimiter as a literal character within the regular expression
c2b27382 430and the replacement, precede the character by a backslash ('C<\>').
86a59229
WL
431
432Literal newlines may be embedded in the replacement string by
433preceding a newline with a backslash.
434
c2b27382
JK
435Within the replacement, an ampersand ('C<&>') is replaced by the string
436matching the regular expression. The strings 'C<\1>' through 'C<\9>' are
f7964948 437replaced by the corresponding subpattern (see L<"BASIC REGULAR EXPRESSIONS">).
c2b27382 438To get a literal 'C<&>' or 'C<\>' in the replacement text, precede it
86a59229
WL
439by a backslash.
440
441The following I<flags> modify the behaviour of the B<s> command:
442
443=over 8
444
445=item B<g>
446
447The replacement is performed for all matching, non-overlapping substrings
448of the pattern space.
449
450=item B<1>..B<9>
451
452Replace only the n-th matching substring of the pattern space.
453
454=item B<p>
455
456If the substitution was made, print the new value of the pattern space.
457
458=item B<w> I<file>
459
460If the substitution was made, write the new value of the pattern space
461to the specified file.
462
463=back
464
465=cut
466
467#--------------------------------------------------------------------------
468$ComTab{'t'}=[ 2, 'str', \&Branch, '{ goto XXX if _t() }' ]; #ok
469
470=item [2addr]B<t> [I<label>]
471
472Branch to the B<:> function with the specified I<label> if any B<s>
473substitutions have been made since the most recent reading of an input line
474or execution of a B<t> function. If no label is given, branch to the end of
475the script.
476
477
478=cut
479
480#--------------------------------------------------------------------------
481$ComTab{'w'}=[ 2, 'str', \&Write, "{ _w( '-X-' ) }" ]; #ok
482
483=item [2addr]B<w> I<file>
484
485The contents of the pattern space are written to the I<file>.
486
487=cut
488
489#--------------------------------------------------------------------------
490$ComTab{'x'}=[ 2, '', \&Emit, '{ ($Hold, $_) = ($_, $Hold) }' ]; #ok
491
492=item [2addr]B<x>
493
494Swap the contents of the pattern space and the hold space.
495
496=cut
497
498#--------------------------------------------------------------------------
499$ComTab{'y'}=[ 2, 'tra', \&Emit, '' ]; #ok
5f3a9e9e 500
86a59229
WL
501=item [2addr]B<y>B</>I<string1>B</>I<string2>B</>
502
2a467c83 503In the pattern space, replace all characters occurring in I<string1> by the
86a59229
WL
504character at the corresponding position in I<string2>. It is possible
505to use any character (other than a backslash or newline) instead of a
506slash to delimit the strings. Within I<string1> and I<string2>, a
507backslash followed by any character other than a newline is that literal
c2b27382 508character, and a backslash followed by an 'n' is replaced by a newline
86a59229
WL
509character.
510
511=cut
d83e3bda 512
86a59229
WL
513#--------------------------------------------------------------------------
514$ComTab{'='}=[ 1, '', \&Emit, '{ print "$.\n" }' ]; #ok
515
516=item [1addr]B<=>
517
518Prints the current line number on the standard output.
519
520=cut
521
522#--------------------------------------------------------------------------
523$ComTab{':'}=[ 0, 'str', \&Label, '' ]; #ok
5f3a9e9e 524
86a59229
WL
525=item [0addr]B<:> [I<label>]
526
527The command specifies the position of the I<label>. It has no other effect.
528
529=cut
530
531#--------------------------------------------------------------------------
532$ComTab{'{'}=[ 2, '', \&BeginBlock, '{' ]; #ok
533$ComTab{'}'}=[ 0, '', \&EndBlock, ';}' ]; #ok
534# ';' to avoid warning on empty {}-block
535
536=item [2addr]B<{> [I<command>]
537
538=item [0addr]B<}>
539
540These two commands begin and end a command list. The first command may
541be given on the same line as the opening B<{> command. The commands
542within the list are jointly selected by the address(es) given on the
543B<{> command (but may still have individual addresses).
544
545=cut
546
547#--------------------------------------------------------------------------
548$ComTab{'#'}=[ 0, 'str', \&Comment, '' ]; #ok
549
550=item [0addr]B<#> [I<comment>]
551
552The entire line is ignored (treated as a comment). If, however, the first
c2b27382 553two characters in the script are 'C<#n>', automatic printing of output is
86a59229
WL
554suppressed, as if the B<-n> option were given on the command line.
555
556=back
557
558=cut
559
560use vars qw{ $isEOF $Hold %wFiles @Q $CondReg $doPrint };
561
562my $useDEBUG = exists( $ENV{PSEDDEBUG} );
563my $useEXTBRE = $ENV{PSEDEXTBRE} || '';
564$useEXTBRE =~ s/[^<>wWyB]//g; # gawk RE's handle these
565
566my $doAutoPrint = 1; # automatic printing of pattern space (-n => 0)
567my $doOpenWrite = 1; # open w command output files at start (-a => 0)
568my $svOpenWrite = 0; # save $doOpenWrite
96021e87
JM
569
570# lower case $0 below as a VMSism. The VMS build procedure creates the
571# s2p file traditionally in upper case on the disk. When VMS is in a
572# case preserved or case sensitive mode, $0 will be returned in the exact
573# case which will be on the disk, and that is not predictable at this time.
574
575my $doGenerate = lc($0) eq 's2p';
86a59229
WL
576
577# Collected and compiled script
578#
d16f50bd
WL
579my( @Commands, %Defined, @BlockStack, %Label, $labNum, $Code, $Func );
580$Code = '';
86a59229
WL
581
582##################
583# Compile Time
584#
585# Labels
586#
587# Error handling
588#
589sub Warn($;$){
590 my( $msg, $loc ) = @_;
591 $loc ||= '';
592 $loc .= ': ' if length( $loc );
593 warn( "$0: $loc$msg\n" );
594}
595
596$labNum = 0;
597sub newLabel(){
598 return 'L_'.++$labNum;
599}
600
601# safeHere: create safe here delimiter and modify opcode and argument
602#
603sub safeHere($$){
604 my( $codref, $argref ) = @_;
605 my $eod = 'EOD000';
606 while( $$argref =~ /^$eod$/m ){
607 $eod++;
608 }
609 $$codref =~ s/TheEnd/$eod/e;
610 $$argref .= "$eod\n";
611}
612
613# Emit: create address logic and emit command
614#
615sub Emit($$$$$$){
616 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
617 my $cond = '';
618 if( defined( $addr1 ) ){
619 if( defined( $addr2 ) ){
620 $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
621 } else {
622 $addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
623 }
624 $cond = $negated ? "unless( $addr1 )\n" : "if( $addr1 )\n";
625 }
626
627 if( $opcode eq '' ){
628 $Code .= "$cond$arg\n";
629
630 } elsif( $opcode =~ s/-X-/$arg/e ){
631 $Code .= "$cond$opcode\n";
632
633 } elsif( $opcode =~ /TheEnd/ ){
634 safeHere( \$opcode, \$arg );
635 $Code .= "$cond$opcode$arg";
636
637 } else {
638 $Code .= "$cond$opcode\n";
639 }
640 0;
641}
642
643# Write (w command, w flag): store pathname
644#
645sub Write($$$$$$){
646 my( $addr1, $addr2, $negated, $opcode, $path, $fl ) = @_;
647 $wFiles{$path} = '';
648 Emit( $addr1, $addr2, $negated, $opcode, $path, $fl );
649}
650
651
652# Label (: command): label definition
653#
654sub Label($$$$$$){
655 my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
656 my $rc = 0;
657 $lab =~ s/\s+//;
658 if( length( $lab ) ){
659 my $h;
660 if( ! exists( $Label{$lab} ) ){
661 $h = $Label{$lab}{name} = newLabel();
662 } else {
663 $h = $Label{$lab}{name};
664 if( exists( $Label{$lab}{defined} ) ){
665 my $dl = $Label{$lab}{defined};
666 Warn( "duplicate label $lab (first defined at $dl)", $fl );
667 $rc = 1;
668 }
669 }
670 $Label{$lab}{defined} = $fl;
671 $Code .= "$h:;\n";
672 }
673 $rc;
674}
675
676# BeginBlock ({ command): push block start
677#
678sub BeginBlock($$$$$$){
679 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
680 push( @BlockStack, [ $fl, $addr1, $addr2, $negated ] );
681 Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
682}
683
684# EndBlock (} command): check proper nesting
685#
686sub EndBlock($$$$$$){
687 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
688 my $rc;
689 my $jcom = pop( @BlockStack );
690 if( defined( $jcom ) ){
691 $rc = Emit( $addr1, $addr2, $negated, $opcode, $arg, $fl );
692 } else {
c2b27382 693 Warn( "unexpected '}'", $fl );
86a59229
WL
694 $rc = 1;
695 }
696 $rc;
697}
698
699# Branch (t, b commands): check or create label, substitute default
700#
701sub Branch($$$$$$){
702 my( $addr1, $addr2, $negated, $opcode, $lab, $fl ) = @_;
703 $lab =~ s/\s+//; # no spaces at end
704 my $h;
705 if( length( $lab ) ){
706 if( ! exists( $Label{$lab} ) ){
707 $h = $Label{$lab}{name} = newLabel();
708 } else {
709 $h = $Label{$lab}{name};
710 }
711 push( @{$Label{$lab}{used}}, $fl );
712 } else {
713 $h = 'EOS';
714 }
715 $opcode =~ s/XXX/$h/e;
716 Emit( $addr1, $addr2, $negated, $opcode, '', $fl );
717}
718
719# Change (c command): is special due to range end watching
720#
721sub Change($$$$$$){
722 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
723 my $kwd = $negated ? 'unless' : 'if';
724 if( defined( $addr2 ) ){
725 $addr1 .= $addr2 =~ /^\d+$/ ? "..$addr2" : "...$addr2";
726 if( ! $negated ){
727 $addr1 = '$icnt = ('.$addr1.')';
728 $opcode = 'if( $icnt =~ /E0$/ )' . $opcode;
729 }
730 } else {
731 $addr1 .= ' == $.' if $addr1 =~ /^\d+$/;
732 }
733 safeHere( \$opcode, \$arg );
734 $Code .= "$kwd( $addr1 ){\n $opcode$arg}\n";
735 0;
736}
737
738
739# Comment (# command): A no-op. Who would've thought that!
740#
741sub Comment($$$$$$){
742 my( $addr1, $addr2, $negated, $opcode, $arg, $fl ) = @_;
743### $Code .= "# $arg\n";
744 0;
745}
746
e2f73067
WL
747# stripRegex from the current command. If we're in the first
748# part of s///, trailing spaces have to be kept as the initial
749# part of the replacement string.
750#
b588327e 751sub stripRegex($$;$){
e2f73067 752 my( $del, $sref, $sub ) = @_;
86a59229
WL
753 my $regex = $del;
754 print "stripRegex:$del:$$sref:\n" if $useDEBUG;
755 while( $$sref =~ s{^(.*?)(\\*)\Q$del\E(\s*)}{}s ){
756 my $sl = $2;
757 $regex .= $1.$sl.$del;
758 if( length( $sl ) % 2 == 0 ){
e2f73067
WL
759 if( $sub && (length( $3 ) > 0) ){
760 $$sref = $3 . $$sref;
761 }
86a59229
WL
762 return $regex;
763 }
764 $regex .= $3;
765 }
766 undef();
767}
768
769# stripTrans: take a <del> terminated string from y command
770# honoring and cleaning up of \-escaped <del>'s
771#
772sub stripTrans($$){
773 my( $del, $sref ) = @_;
774 my $t = '';
775 print "stripTrans:$del:$$sref:\n" if $useDEBUG;
776 while( $$sref =~ s{^(.*?)(\\*)\Q$del\E}{}s ){
777 my $sl = $2;
778 $t .= $1;
779 if( length( $sl ) % 2 == 0 ){
780 $t .= $sl;
781 $t =~ s/\\\\/\\/g;
782 return $t;
783 }
784 chop( $sl );
785 $t .= $sl.$del.$3;
786 }
787 undef();
788}
789
790# makey - construct Perl y/// from sed y///
791#
792sub makey($$$){
793 my( $fr, $to, $fl ) = @_;
794 my $error = 0;
795
796 # Ensure that any '-' is up front.
797 # Diagnose duplicate contradicting mappings
798 my %tr;
799 for( my $i = 0; $i < length($fr); $i++ ){
800 my $fc = substr($fr,$i,1);
801 my $tc = substr($to,$i,1);
802 if( exists( $tr{$fc} ) && $tr{$fc} ne $tc ){
c2b27382 803 Warn( "ambiguous translation for character '$fc' in 'y' command",
86a59229
WL
804 $fl );
805 $error++;
806 }
807 $tr{$fc} = $tc;
808 }
809 $fr = $to = '';
810 if( exists( $tr{'-'} ) ){
811 ( $fr, $to ) = ( '-', $tr{'-'} );
812 delete( $tr{'-'} );
813 } else {
814 $fr = $to = '';
815 }
816 # might just as well sort it...
817 for my $fc ( sort keys( %tr ) ){
818 $fr .= $fc;
819 $to .= $tr{$fc};
820 }
821 # make embedded delimiters and newlines safe
822 $fr =~ s/([{}])/\$1/g;
823 $to =~ s/([{}])/\$1/g;
824 $fr =~ s/\n/\\n/g;
825 $to =~ s/\n/\\n/g;
826 return $error ? undef() : "{ y{$fr}{$to}; }";
827}
828
829######
830# makes - construct Perl s/// from sed s///
831#
832sub makes($$$$$$$){
833 my( $regex, $subst, $path, $global, $print, $nmatch, $fl ) = @_;
834
835 # make embedded newlines safe
836 $regex =~ s/\n/\\n/g;
837 $subst =~ s/\n/\\n/g;
838
839 my $code;
840 # n-th occurrence
841 #
842 if( length( $nmatch ) ){
843 $code = <<TheEnd;
844{ \$n = $nmatch;
845 while( --\$n && ( \$s = m ${regex}g ) ){}
846 \$s = ( substr( \$_, pos() ) =~ s ${regex}${subst}s ) if \$s;
847 \$CondReg ||= \$s;
848TheEnd
849 } else {
850 $code = <<TheEnd;
851{ \$s = s ${regex}${subst}s${global};
852 \$CondReg ||= \$s;
853TheEnd
854 }
855 if( $print ){
856 $code .= ' print $_, "\n" if $s;'."\n";
857 }
858 if( defined( $path ) ){
859 $wFiles{$path} = '';
860 $code .= " _w( '$path' ) if \$s;\n";
d16f50bd 861 $GenKey{'w'} = 1;
86a59229
WL
862 }
863 $code .= "}";
864}
865
866=head1 BASIC REGULAR EXPRESSIONS
867
868A I<Basic Regular Expression> (BRE), as defined in POSIX 1003.2, consists
869of I<atoms>, for matching parts of a string, and I<bounds>, specifying
870repetitions of a preceding atom.
871
872=head2 Atoms
873
874The possible atoms of a BRE are: B<.>, matching any single character;
875B<^> and B<$>, matching the null string at the beginning or end
876of a string, respectively; a I<bracket expressions>, enclosed
877in B<[> and B<]> (see below); and any single character with no
878other significance (matching that character). A B<\> before one
879of: B<.>, B<^>, B<$>, B<[>, B<*>, B<\>, matching the character
880after the backslash. A sequence of atoms enclosed in B<\(> and B<\)>
881becomes an atom and establishes the target for a I<backreference>,
882consisting of the substring that actually matches the enclosed atoms.
883Finally, B<\> followed by one of the digits B<0> through B<9> is a
884backreference.
885
886A B<^> that is not first, or a B<$> that is not last does not have
887a special significance and need not be preceded by a backslash to
888become literal. The same is true for a B<]>, that does not terminate
889a bracket expression.
890
891An unescaped backslash cannot be last in a BRE.
892
893=head2 Bounds
894
895The BRE bounds are: B<*>, specifying 0 or more matches of the preceding
896atom; B<\{>I<count>B<\}>, specifying that many repetitions;
897B<\{>I<minimum>B<,\}>, giving a lower limit; and
898B<\{>I<minimum>B<,>I<maximum>B<\}> finally defines a lower and upper
899bound.
900
901A bound appearing as the first item in a BRE is taken literally.
902
903=head2 Bracket Expressions
904
905A I<bracket expression> is a list of characters, character ranges
906and character classes enclosed in B<[> and B<]> and matches any
907single character from the represented set of characters.
908
909A character range is written as two characters separated by B<-> and
910represents all characters (according to the character collating sequence)
911that are not less than the first and not greater than the second.
912(Ranges are very collating-sequence-dependent, and portable programs
913should avoid relying on them.)
914
915A character class is one of the class names
916
917 alnum digit punct
918 alpha graph space
919 blank lower upper
920 cntrl print xdigit
921
922enclosed in B<[:> and B<:]> and represents the set of characters
923as defined in ctype(3).
924
925If the first character after B<[> is B<^>, the sense of matching is
926inverted.
927
c2b27382 928To include a literal 'C<^>', place it anywhere else but first. To
86a59229 929include a literal 'C<]>' place it first or immediately after an
c2b27382 930initial B<^>. To include a literal 'C<->' make it the first (or
86a59229
WL
931second after B<^>) or last character, or the second endpoint of
932a range.
933
934The special bracket expression constructs C<[[:E<lt>:]]> and C<[[:E<gt>:]]>
935match the null string at the beginning and end of a word respectively.
c2b27382 936(Note that neither is identical to Perl's '\b' atom.)
86a59229
WL
937
938=head2 Additional Atoms
939
940Since some sed implementations provide additional regular expression
941atoms (not defined in POSIX 1003.2), B<psed> is capable of translating
942the following backslash escapes:
943
944=over 4
945
946=item B<\E<lt>> This is the same as C<[[:E<gt>:]]>.
947
948=item B<\E<gt>> This is the same as C<[[:E<lt>:]]>.
949
950=item B<\w> This is an abbreviation for C<[[:alnum:]_]>.
951
952=item B<\W> This is an abbreviation for C<[^[:alnum:]_]>.
953
954=item B<\y> Match the empty string at a word boundary.
955
956=item B<\B> Match the empty string between any two either word or non-word characters.
957
958=back
959
960To enable this feature, the environment variable PSEDEXTBRE must be set
961to a string containing the requested characters, e.g.:
962C<PSEDEXTBRE='E<lt>E<gt>wW'>.
963
964=cut
965
966#####
967# bre2p - convert BRE to Perl RE
968#
969sub peek(\$$){
970 my( $pref, $ic ) = @_;
971 $ic < length($$pref)-1 ? substr( $$pref, $ic+1, 1 ) : '';
972}
973
974sub bre2p($$$){
975 my( $del, $pat, $fl ) = @_;
976 my $led = $del;
977 $led =~ tr/{([</})]>/;
978 $led = '' if $led eq $del;
979
980 $pat = substr( $pat, 1, length($pat) - 2 );
981 my $res = '';
982 my $bracklev = 0;
983 my $backref = 0;
984 my $parlev = 0;
985 for( my $ic = 0; $ic < length( $pat ); $ic++ ){
986 my $c = substr( $pat, $ic, 1 );
987 if( $c eq '\\' ){
988 ### backslash escapes
989 my $nc = peek($pat,$ic);
990 if( $nc eq '' ){
215757ac 991 Warn( "'\\' cannot be last in pattern", $fl );
86a59229
WL
992 return undef();
993 }
994 $ic++;
995 if( $nc eq $del ){ ## \<pattern del> => \<pattern del>
996 $res .= "\\$del";
997
998 } elsif( $nc =~ /([[.*\\n])/ ){
999 ## check for \-escaped magics and \n:
1000 ## \[ \. \* \\ \n stay as they are
1001 $res .= '\\'.$nc;
1002
1003 } elsif( $nc eq '(' ){ ## \( => (
1004 $parlev++;
1005 $res .= '(';
1006
1007 } elsif( $nc eq ')' ){ ## \) => )
1008 $parlev--;
1009 $backref++;
1010 if( $parlev < 0 ){
c2b27382 1011 Warn( "unmatched '\\)'", $fl );
86a59229
WL
1012 return undef();
1013 }
1014 $res .= ')';
1015
1016 } elsif( $nc eq '{' ){ ## repetition factor \{<i>[,[<j>]]\}
1017 my $endpos = index( $pat, '\\}', $ic );
1018 if( $endpos < 0 ){
c2b27382 1019 Warn( "unmatched '\\{'", $fl );
86a59229
WL
1020 return undef();
1021 }
1022 my $rep = substr( $pat, $ic+1, $endpos-($ic+1) );
1023 $ic = $endpos + 1;
1024
1025 if( $res =~ /^\^?$/ ){
1026 $res .= "\\{$rep\}";
1027 } elsif( $rep =~ /^(\d+)(,?)(\d*)?$/ ){
1028 my $min = $1;
1029 my $com = $2 || '';
1030 my $max = $3;
1031 if( length( $max ) ){
1032 if( $max < $min ){
c2b27382 1033 Warn( "maximum less than minimum in '\\{$rep\\}'",
86a59229
WL
1034 $fl );
1035 return undef();
1036 }
1037 } else {
1038 $max = '';
1039 }
1040 # simplify some
1041 if( $min == 0 && $max eq '1' ){
1042 $res .= '?';
1043 } elsif( $min == 1 && "$com$max" eq ',' ){
1044 $res .= '+';
1045 } elsif( $min == 0 && "$com$max" eq ',' ){
1046 $res .= '*';
1047 } else {
1048 $res .= "{$min$com$max}";
1049 }
1050 } else {
c2b27382 1051 Warn( "invalid repeat clause '\\{$rep\\}'", $fl );
86a59229
WL
1052 return undef();
1053 }
1054
1055 } elsif( $nc =~ /^[1-9]$/ ){
1056 ## \1 .. \9 => \1 .. \9, but check for a following digit
1057 if( $nc > $backref ){
1058 Warn( "invalid backreference ($nc)", $fl );
1059 return undef();
1060 }
1061 $res .= "\\$nc";
1062 if( peek($pat,$ic) =~ /[0-9]/ ){
1063 $res .= '(?:)';
1064 }
1065
1066 } elsif( $useEXTBRE && ( $nc =~ /[$useEXTBRE]/ ) ){
1067 ## extensions - at most <>wWyB - not in POSIX
1068 if( $nc eq '<' ){ ## \< => \b(?=\w), be precise
1069 $res .= '\\b(?<=\\W)';
1070 } elsif( $nc eq '>' ){ ## \> => \b(?=\W), be precise
1071 $res .= '\\b(?=\\W)';
1072 } elsif( $nc eq 'y' ){ ## \y => \b
1073 $res .= '\\b';
1074 } else { ## \B, \w, \W remain the same
1075 $res .= "\\$nc";
1076 }
1077 } elsif( $nc eq $led ){
1078 ## \<closing bracketing-delimiter> - keep '\'
1079 $res .= "\\$nc";
1080
c2b27382 1081 } else { ## \ <char> => <char> ("as if '\' were not present")
86a59229
WL
1082 $res .= $nc;
1083 }
1084
1085 } elsif( $c eq '.' ){ ## . => .
1086 $res .= $c;
1087
1088 } elsif( $c eq '*' ){ ## * => * but \* if there's nothing preceding it
1089 if( $res =~ /^\^?$/ ){
1090 $res .= '\\*';
1091 } elsif( substr( $res, -1, 1 ) ne '*' ){
1092 $res .= $c;
1093 }
1094
1095 } elsif( $c eq '[' ){
1096 ## parse []: [^...] [^]...] [-...]
1097 my $add = '[';
1098 if( peek($pat,$ic) eq '^' ){
1099 $ic++;
1100 $add .= '^';
1101 }
1102 my $nc = peek($pat,$ic);
1103 if( $nc eq ']' || $nc eq '-' ){
1104 $add .= $nc;
1105 $ic++;
1106 }
1107 # check that [ is not trailing
1108 if( $ic >= length( $pat ) - 1 ){
c2b27382 1109 Warn( "unmatched '['", $fl );
86a59229
WL
1110 return undef();
1111 }
1112 # look for [:...:] and x-y
1113 my $rstr = substr( $pat, $ic+1 );
1114 if( $rstr =~ /^((?:\[:\(\w+|[><]\):\]|[^]-](?:-[^]])?)*)/ ){
1115 my $cnt = $1;
1116 $ic += length( $cnt );
c2b27382 1117 $cnt =~ s/([\\\$])/\\$1/g; # '\', '$' are magic in Perl []
86a59229
WL
1118 # try some simplifications
1119 my $red = $cnt;
1120 if( $red =~ s/0-9// ){
1121 $cnt = $red.'\d';
1122 if( $red =~ s/A-Z// && $red =~ s/a-z// && $red =~ s/_// ){
1123 $cnt = $red.'\w';
1124 }
1125 }
1126 $add .= $cnt;
1127
1128 # POSIX 1003.2 has this (optional) for begin/end word
1129 $add = '\\b(?=\\W)' if $add eq '[[:<:]]';
1130 $add = '\\b(?<=\\W)' if $add eq '[[:>:]]';
1131
1132 }
1133
c2b27382 1134 ## may have a trailing '-' before ']'
86a59229
WL
1135 if( $ic < length($pat) - 1 &&
1136 substr( $pat, $ic+1 ) =~ /^(-?])/ ){
1137 $ic += length( $1 );
1138 $add .= $1;
1139 # another simplification
1140 $add =~ s/^\[(\^?)(\\[dw])]$/ $1 eq '^' ? uc($2) : $2 /e;
1141 $res .= $add;
1142 } else {
c2b27382 1143 Warn( "unmatched '['", $fl );
86a59229
WL
1144 return undef();
1145 }
3cb6de81 1146
86a59229
WL
1147 } elsif( $c eq $led ){ ## unescaped <closing bracketing-delimiter>
1148 $res .= "\\$c";
d83e3bda 1149
86a59229
WL
1150 } elsif( $c eq ']' ){ ## unmatched ] is not magic
1151 $res .= ']';
d83e3bda 1152
86a59229
WL
1153 } elsif( $c =~ /[|+?{}()]/ ){ ## not magic in BRE, but in Perl: \-quote
1154 $res .= "\\$c";
d83e3bda 1155
86a59229
WL
1156 } elsif( $c eq '^' ){ ## not magic unless 1st, but in Perl: \-quote
1157 $res .= length( $res ) ? '\\^' : '^';
d83e3bda 1158
86a59229
WL
1159 } elsif( $c eq '$' ){ ## not magic unless last, but in Perl: \-quote
1160 $res .= $ic == length( $pat ) - 1 ? '$' : '\\$';
8d063cd8 1161
86a59229
WL
1162 } else {
1163 $res .= $c;
1164 }
8d063cd8 1165 }
0a12ae7d 1166
86a59229 1167 if( $parlev ){
c2b27382 1168 Warn( "unmatched '\\('", $fl );
86a59229 1169 return undef();
8d063cd8 1170 }
0a12ae7d 1171
86a59229
WL
1172 # final cleanup: eliminate raw HTs
1173 $res =~ s/\t/\\t/g;
1174 return $del . $res . ( $led ? $led : $del );
1175}
0a12ae7d 1176
86a59229
WL
1177
1178#####
1179# sub2p - convert sed substitution to Perl substitution
1180#
1181sub sub2p($$$){
1182 my( $del, $subst, $fl ) = @_;
1183 my $led = $del;
1184 $led =~ tr/{([</})]>/;
1185 $led = '' if $led eq $del;
1186
1187 $subst = substr( $subst, 1, length($subst) - 2 );
1188 my $res = '';
1189
1190 for( my $ic = 0; $ic < length( $subst ); $ic++ ){
1191 my $c = substr( $subst, $ic, 1 );
1192 if( $c eq '\\' ){
1193 ### backslash escapes
1194 my $nc = peek($subst,$ic);
1195 if( $nc eq '' ){
215757ac 1196 Warn( "'\\' cannot be last in substitution", $fl );
86a59229
WL
1197 return undef();
1198 }
1199 $ic++;
1200 if( $nc =~ /[\\$del$led]/ ){ ## \ and delimiter
1201 $res .= '\\' . $nc;
1202 } elsif( $nc =~ /[1-9]/ ){ ## \1 - \9 => ${1} - ${9}
1203 $res .= '${' . $nc . '}';
1204 } else { ## everything else (includes &): omit \
1205 $res .= $nc;
1206 }
1207 } elsif( $c eq '&' ){ ## & => $&
1208 $res .= '$&';
1209 } elsif( $c =~ /[\$\@$led]/ ){ ## magic in Perl's substitution string
1210 $res .= '\\' . $c;
1211 } else {
1212 $res .= $c;
8d063cd8 1213 }
8d063cd8
LW
1214 }
1215
86a59229
WL
1216 # final cleanup: eliminate raw HTs
1217 $res =~ s/\t/\\t/g;
1218 return ( $led ? $del : $led ) . $res . ( $led ? $led : $del );
1219}
0a12ae7d 1220
86a59229
WL
1221
1222sub Parse(){
1223 my $error = 0;
1224 my( $pdef, $pfil, $plin );
1225 for( my $icom = 0; $icom < @Commands; $icom++ ){
1226 my $cmd = $Commands[$icom];
1227 print "Parse:$cmd:\n" if $useDEBUG;
1228 $cmd =~ s/^\s+//;
1229 next unless length( $cmd );
1230 my $scom = $icom;
1231 if( exists( $Defined{$icom} ) ){
1232 $pdef = $Defined{$icom};
1233 if( $pdef =~ /^ #(\d+)/ ){
1234 $pfil = 'expression #';
1235 $plin = $1;
1236 } else {
1237 $pfil = "$pdef l.";
1238 $plin = 1;
1239 }
1240 } else {
1241 $plin++;
1242 }
1243 my $fl = "$pfil$plin";
1244
1245 # insert command as comment in gnerated code
1246 #
1247 $Code .= "# $cmd\n" if $doGenerate;
1248
1249 # The Address(es)
1250 #
1251 my( $negated, $naddr, $addr1, $addr2 );
1252 $naddr = 0;
1253 if( $cmd =~ s/^(\d+)\s*// ){
1254 $addr1 = "$1"; $naddr++;
1255 } elsif( $cmd =~ s/^\$\s*// ){
1256 $addr1 = 'eofARGV()'; $naddr++;
1257 } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){
1258 my $del = $1;
1259 my $regex = stripRegex( $del, \$cmd );
1260 if( defined( $regex ) ){
1261 $addr1 = 'm '.bre2p( $del, $regex, $fl ).'s';
1262 $naddr++;
1263 } else {
1264 Warn( "malformed regex, 1st address", $fl );
1265 $error++;
1266 next;
1267 }
1268 }
1269 if( defined( $addr1 ) && $cmd =~ s/,\s*// ){
1270 if( $cmd =~ s/^(\d+)\s*// ){
1271 $addr2 = "$1"; $naddr++;
1272 } elsif( $cmd =~ s/^\$\s*// ){
1273 $addr2 = 'eofARGV()'; $naddr++;
1274 } elsif( $cmd =~ s{^(/)}{} || $cmd =~ s{^\\(.)}{} ){
1275 my $del = $1;
1276 my $regex = stripRegex( $del, \$cmd );
1277 if( defined( $regex ) ){
1278 $addr2 = 'm '. bre2p( $del, $regex, $fl ).'s';
1279 $naddr++;
1280 } else {
1281 Warn( "malformed regex, 2nd address", $fl );
1282 $error++;
1283 next;
1284 }
1285 } else {
c2b27382 1286 Warn( "invalid address after ','", $fl );
86a59229
WL
1287 $error++;
1288 next;
1289 }
1290 }
1291
c2b27382 1292 # address modifier '!'
86a59229
WL
1293 #
1294 $negated = $cmd =~ s/^!\s*//;
1295 if( defined( $addr1 ) ){
1296 print "Parse: addr1=$addr1" if $useDEBUG;
1297 if( defined( $addr2 ) ){
1298 print ", addr2=$addr2 " if $useDEBUG;
1299 # both numeric and addr1 > addr2 => eliminate addr2
1300 undef( $addr2 ) if $addr1 =~ /^\d+$/ &&
1301 $addr2 =~ /^\d+$/ && $addr1 > $addr2;
1302 }
9ef589d8 1303 }
86a59229
WL
1304 print 'negated' if $useDEBUG && $negated;
1305 print " command:$cmd\n" if $useDEBUG;
1306
1307 # The Command
1308 #
1309 if( $cmd !~ s/^([:#={}abcdDgGhHilnNpPqrstwxy])\s*// ){
1310 my $h = substr( $cmd, 0, 1 );
c2b27382 1311 Warn( "unknown command '$h'", $fl );
86a59229 1312 $error++;
8d063cd8
LW
1313 next;
1314 }
86a59229 1315 my $key = $1;
8d063cd8 1316
86a59229 1317 my $tabref = $ComTab{$key};
d16f50bd 1318 $GenKey{$key} = 1;
86a59229
WL
1319 if( $naddr > $tabref->[0] ){
1320 Warn( "excess address(es)", $fl );
1321 $error++;
8d063cd8
LW
1322 next;
1323 }
1324
86a59229
WL
1325 my $arg = '';
1326 if( $tabref->[1] eq 'str' ){
1327 # take remainder - don't care if it is empty
1328 $arg = $cmd;
1329 $cmd = '';
1330
1331 } elsif( $tabref->[1] eq 'txt' ){
1332 # multi-line text
1333 my $goon = $cmd =~ /(.*)\\$/;
1334 if( length( $1 ) ){
1335 Warn( "extra characters after command ($cmd)", $fl );
1336 $error++;
8d063cd8 1337 }
86a59229
WL
1338 while( $goon ){
1339 $icom++;
1340 if( $icom > $#Commands ){
1341 Warn( "unexpected end of script", $fl );
1342 $error++;
1343 last;
1344 }
1345 $cmd = $Commands[$icom];
1346 $Code .= "# $cmd\n" if $doGenerate;
1347 $goon = $cmd =~ s/\\$//;
1348 $cmd =~ s/\\(.)/$1/g;
1349 $arg .= "\n" if length( $arg );
1350 $arg .= $cmd;
8d063cd8 1351 }
86a59229
WL
1352 $arg .= "\n" if length( $arg );
1353 $cmd = '';
8d063cd8 1354
86a59229
WL
1355 } elsif( $tabref->[1] eq 'sub' ){
1356 # s///
1357 if( ! length( $cmd ) ){
215757ac 1358 Warn( "'s' command requires argument", $fl );
86a59229
WL
1359 $error++;
1360 next;
1361 }
1362 if( $cmd =~ s{^([^\\\n])}{} ){
1363 my $del = $1;
e2f73067 1364 my $regex = stripRegex( $del, \$cmd, "s" );
86a59229
WL
1365 if( ! defined( $regex ) ){
1366 Warn( "malformed regular expression", $fl );
1367 $error++;
1368 next;
a687059c 1369 }
86a59229
WL
1370 $regex = bre2p( $del, $regex, $fl );
1371
1372 # a trailing \ indicates embedded NL (in replacement string)
1373 while( $cmd =~ s/(?<!\\)\\$/\n/ ){
1374 $icom++;
1375 if( $icom > $#Commands ){
1376 Warn( "unexpected end of script", $fl );
1377 $error++;
1378 last;
8d063cd8 1379 }
86a59229
WL
1380 $cmd .= $Commands[$icom];
1381 $Code .= "# $Commands[$icom]\n" if $doGenerate;
9ef589d8 1382 }
86a59229
WL
1383
1384 my $subst = stripRegex( $del, \$cmd );
1385 if( ! defined( $regex ) ){
1386 Warn( "malformed substitution expression", $fl );
1387 $error++;
0a12ae7d
LW
1388 next;
1389 }
86a59229
WL
1390 $subst = sub2p( $del, $subst, $fl );
1391
1392 # parse s/// modifier: g|p|0-9|w <file>
1393 my( $global, $nmatch, $print, $write ) =
1394 ( '', '', 0, undef );
1395 while( $cmd =~ s/^([gp0-9])// ){
1396 $1 eq 'g' ? ( $global = 'g' ) :
1397 $1 eq 'p' ? ( $print = $1 ) : ( $nmatch .= $1 );
1398 }
1399 $write = $1 if $cmd =~ s/w\s*(.*)$//;
1400 ### $nmatch =~ s/^(\d)\1*$/$1/; ### may be dangerous?
1401 if( $global && length( $nmatch ) || length( $nmatch ) > 1 ){
c2b27382 1402 Warn( "conflicting flags '$global$nmatch'", $fl );
86a59229 1403 $error++;
0a12ae7d
LW
1404 next;
1405 }
86a59229
WL
1406
1407 $arg = makes( $regex, $subst,
1408 $write, $global, $print, $nmatch, $fl );
1409 if( ! defined( $arg ) ){
1410 $error++;
8d063cd8
LW
1411 next;
1412 }
86a59229
WL
1413
1414 } else {
1415 Warn( "improper delimiter in s command", $fl );
1416 $error++;
1417 next;
1418 }
1419
1420 } elsif( $tabref->[1] eq 'tra' ){
1421 # y///
1422 # a trailing \ indicates embedded newline
1423 while( $cmd =~ s/(?<!\\)\\$/\n/ ){
1424 $icom++;
1425 if( $icom > $#Commands ){
1426 Warn( "unexpected end of script", $fl );
1427 $error++;
1428 last;
1429 }
1430 $cmd .= $Commands[$icom];
1431 $Code .= "# $Commands[$icom]\n" if $doGenerate;
1432 }
1433 if( ! length( $cmd ) ){
215757ac 1434 Warn( "'y' command requires argument", $fl );
86a59229
WL
1435 $error++;
1436 next;
1437 }
1438 my $d = substr( $cmd, 0, 1 ); $cmd = substr( $cmd, 1 );
1439 if( $d eq '\\' ){
215757ac 1440 Warn( "'\\' not valid as delimiter in 'y' command", $fl );
86a59229
WL
1441 $error++;
1442 next;
1443 }
1444 my $fr = stripTrans( $d, \$cmd );
1445 if( ! defined( $fr ) || ! length( $cmd ) ){
c2b27382 1446 Warn( "malformed 'y' command argument", $fl );
86a59229
WL
1447 $error++;
1448 next;
1449 }
1450 my $to = stripTrans( $d, \$cmd );
1451 if( ! defined( $to ) ){
c2b27382 1452 Warn( "malformed 'y' command argument", $fl );
86a59229
WL
1453 $error++;
1454 next;
1455 }
1456 if( length($fr) != length($to) ){
c2b27382 1457 Warn( "string lengths in 'y' command differ", $fl );
86a59229
WL
1458 $error++;
1459 next;
1460 }
1461 if( ! defined( $arg = makey( $fr, $to, $fl ) ) ){
1462 $error++;
1463 next;
8d063cd8 1464 }
8d063cd8 1465
8d063cd8
LW
1466 }
1467
86a59229
WL
1468 # $cmd must be now empty - exception is {
1469 if( $cmd !~ /^\s*$/ ){
1470 if( $key eq '{' ){
1471 # dirty hack to process command on '{' line
1472 $Commands[$icom--] = $cmd;
1473 } else {
1474 Warn( "extra characters after command ($cmd)", $fl );
1475 $error++;
1476 next;
1477 }
8d063cd8
LW
1478 }
1479
86a59229
WL
1480 # Make Code
1481 #
1482 if( &{$tabref->[2]}( $addr1, $addr2, $negated,
1483 $tabref->[3], $arg, $fl ) ){
1484 $error++;
8d063cd8 1485 }
86a59229 1486 }
8d063cd8 1487
86a59229
WL
1488 while( @BlockStack ){
1489 my $bl = pop( @BlockStack );
c2b27382 1490 Warn( "start of unterminated '{'", $bl );
86a59229
WL
1491 $error++;
1492 }
8d063cd8 1493
86a59229
WL
1494 for my $lab ( keys( %Label ) ){
1495 if( ! exists( $Label{$lab}{defined} ) ){
1496 for my $used ( @{$Label{$lab}{used}} ){
c2b27382 1497 Warn( "undefined label '$lab'", $used );
86a59229
WL
1498 $error++;
1499 }
8d063cd8 1500 }
86a59229 1501 }
8d063cd8 1502
86a59229
WL
1503 exit( 1 ) if $error;
1504}
8d063cd8 1505
8d063cd8 1506
86a59229
WL
1507##############
1508#### MAIN ####
1509##############
8d063cd8 1510
86a59229
WL
1511sub usage(){
1512 print STDERR "Usage: sed [-an] command [file...]\n";
1513 print STDERR " [-an] [-e command] [-f script-file] [file...]\n";
1514}
8d063cd8 1515
86a59229
WL
1516###################
1517# Here we go again...
1518#
1519my $expr = 0;
1520while( @ARGV && $ARGV[0] =~ /^-(.)(.*)$/ ){
1521 my $opt = $1;
1522 my $arg = $2;
1523 shift( @ARGV );
1524 if( $opt eq 'e' ){
1525 if( length( $arg ) ){
1526 push( @Commands, split( "\n", $arg ) );
1527 } elsif( @ARGV ){
1528 push( @Commands, shift( @ARGV ) );
1529 } else {
1530 Warn( "option -e requires an argument" );
1531 usage();
1532 exit( 1 );
1533 }
1534 $expr++;
1535 $Defined{$#Commands} = " #$expr";
1536 next;
1537 }
1538 if( $opt eq 'f' ){
1539 my $path;
1540 if( length( $arg ) ){
1541 $path = $arg;
1542 } elsif( @ARGV ){
1543 $path = shift( @ARGV );
1544 } else {
1545 Warn( "option -f requires an argument" );
1546 usage();
1547 exit( 1 );
1548 }
1549 my $fst = $#Commands + 1;
1550 open( SCRIPT, "<$path" ) || die( "$0: $path: could not open ($!)\n" );
1551 my $cmd;
1552 while( defined( $cmd = <SCRIPT> ) ){
1553 chomp( $cmd );
1554 push( @Commands, $cmd );
1555 }
1556 close( SCRIPT );
1557 if( $#Commands >= $fst ){
1558 $Defined{$fst} = "$path";
8d063cd8 1559 }
86a59229
WL
1560 next;
1561 }
1562 if( $opt eq '-' && $arg eq '' ){
1563 last;
1564 }
1565 if( $opt eq 'h' || $opt eq '?' ){
1566 usage();
1567 exit( 0 );
1568 }
1569 if( $opt eq 'n' ){
1570 $doAutoPrint = 0;
1571 } elsif( $opt eq 'a' ){
1572 $doOpenWrite = 0;
1573 } else {
c2b27382 1574 Warn( "illegal option '$opt'" );
86a59229
WL
1575 usage();
1576 exit( 1 );
1577 }
1578 if( length( $arg ) ){
1579 unshift( @ARGV, "-$arg" );
1580 }
1581}
8d063cd8 1582
86a59229
WL
1583# A singleton command may be the 1st argument when there are no options.
1584#
1585if( @Commands == 0 ){
1586 if( @ARGV == 0 ){
1587 Warn( "no script command given" );
1588 usage();
1589 exit( 1 );
1590 }
1591 push( @Commands, split( "\n", shift( @ARGV ) ) );
1592 $Defined{0} = ' #1';
1593}
8d063cd8 1594
86a59229 1595print STDERR "Files: @ARGV\n" if $useDEBUG;
8d063cd8 1596
86a59229
WL
1597# generate leading code
1598#
d16f50bd 1599$Func = <<'[TheEnd]';
86a59229 1600
d16f50bd
WL
1601# openARGV: open 1st input file
1602#
86a59229
WL
1603sub openARGV(){
1604 unshift( @ARGV, '-' ) unless @ARGV;
1605 my $file = shift( @ARGV );
1606 open( ARG, "<$file" )
1607 || die( "$0: can't open $file for reading ($!)\n" );
1608 $isEOF = 0;
1609}
8d063cd8 1610
d16f50bd
WL
1611# getsARGV: Read another input line into argument (default: $_).
1612# Move on to next input file, and reset EOF flag $isEOF.
86a59229
WL
1613sub getsARGV(;\$){
1614 my $argref = @_ ? shift() : \$_;
1615 while( $isEOF || ! defined( $$argref = <ARG> ) ){
1616 close( ARG );
1617 return 0 unless @ARGV;
1618 my $file = shift( @ARGV );
1619 open( ARG, "<$file" )
1620 || die( "$0: can't open $file for reading ($!)\n" );
1621 $isEOF = 0;
1622 }
1623 1;
1624}
8d063cd8 1625
d16f50bd
WL
1626# eofARGV: end-of-file test
1627#
86a59229
WL
1628sub eofARGV(){
1629 return @ARGV == 0 && ( $isEOF = eof( ARG ) );
1630}
8d063cd8 1631
d16f50bd
WL
1632# makeHandle: Generates another file handle for some file (given by its path)
1633# to be written due to a w command or an s command's w flag.
86a59229
WL
1634sub makeHandle($){
1635 my( $path ) = @_;
1636 my $handle;
1637 if( ! exists( $wFiles{$path} ) || $wFiles{$path} eq '' ){
1638 $handle = $wFiles{$path} = gensym();
1639 if( $doOpenWrite ){
1640 if( ! open( $handle, ">$path" ) ){
1641 die( "$0: can't open $path for writing: ($!)\n" );
1642 }
9ef589d8 1643 }
86a59229
WL
1644 } else {
1645 $handle = $wFiles{$path};
1646 }
1647 return $handle;
1648}
9ef589d8 1649
d16f50bd
WL
1650# printQ: Print queued output which is either a string or a reference
1651# to a pathname.
86a59229
WL
1652sub printQ(){
1653 for my $q ( @Q ){
1654 if( ref( $q ) ){
d16f50bd 1655 # flush open w files so that reading this file gets it all
86a59229
WL
1656 if( exists( $wFiles{$$q} ) && $wFiles{$$q} ne '' ){
1657 open( $wFiles{$$q}, ">>$$q" );
1658 }
d16f50bd 1659 # copy file to stdout: slow, but safe
86a59229 1660 if( open( RF, "<$$q" ) ){
d16f50bd 1661 while( defined( my $line = <RF> ) ){
86a59229
WL
1662 print $line;
1663 }
1664 close( RF );
1665 }
1666 } else {
1667 print $q;
a687059c 1668 }
86a59229
WL
1669 }
1670 undef( @Q );
1671}
1672
d16f50bd
WL
1673[TheEnd]
1674
1675# generate the sed loop
1676#
1677$Code .= <<'[TheEnd]';
1678sub openARGV();
1679sub getsARGV(;\$);
1680sub eofARGV();
1681sub printQ();
1682
1683# Run: the sed loop reading input and applying the script
1684#
86a59229
WL
1685sub Run(){
1686 my( $h, $icnt, $s, $n );
1687 # hack (not unbreakable :-/) to avoid // matching an empty string
1688 my $z = "\000"; $z =~ /$z/;
1689 # Initialize.
1690 openARGV();
1691 $Hold = '';
1692 $CondReg = 0;
1693 $doPrint = $doAutoPrint;
1694CYCLE:
1695 while( getsARGV() ){
1696 chomp();
1697 $CondReg = 0; # cleared on t
1698BOS:;
1699[TheEnd]
1700
1701 # parse - avoid opening files when doing s2p
1702 #
1703 ( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite )
1704 if $doGenerate;
1705 Parse();
1706 ( $svOpenWrite, $doOpenWrite ) = ( $doOpenWrite, $svOpenWrite )
1707 if $doGenerate;
1708
1709 # append trailing code
1710 #
1711 $Code .= <<'[TheEnd]';
1712EOS: if( $doPrint ){
1713 print $_, "\n";
1714 } else {
1715 $doPrint = $doAutoPrint;
a687059c 1716 }
86a59229 1717 printQ() if @Q;
a687059c 1718 }
86a59229
WL
1719
1720 exit( 0 );
a687059c 1721}
86a59229
WL
1722[TheEnd]
1723
d16f50bd
WL
1724
1725# append optional functions, prepend prototypes
1726#
1727my $Proto = "# prototypes\n";
1728if( $GenKey{'l'} ){
1729 $Proto .= "sub _l();\n";
1730 $Func .= <<'[TheEnd]';
1731# _l: l command processing
1732#
1733sub _l(){
1734 my $h = $_;
1735 my $mcpl = 70;
1736 # transform non printing chars into escape notation
1737 $h =~ s/\\/\\\\/g;
1738 if( $h =~ /[^[:print:]]/ ){
1739 $h =~ s/\a/\\a/g;
1740 $h =~ s/\f/\\f/g;
1741 $h =~ s/\n/\\n/g;
1742 $h =~ s/\t/\\t/g;
1743 $h =~ s/\r/\\r/g;
1744 $h =~ s/\e/\\e/g;
1745 $h =~ s/([^[:print:]])/sprintf("\\%03o", ord($1))/ge;
1746 }
1747 # split into lines of length $mcpl
1748 while( length( $h ) > $mcpl ){
1749 my $l = substr( $h, 0, $mcpl-1 );
1750 $h = substr( $h, $mcpl );
1751 # remove incomplete \-escape from end of line
1752 if( $l =~ s/(?<!\\)(\\[0-7]{0,2})$// ){
1753 $h = $1 . $h;
1754 }
1755 print $l, "\\\n";
1756 }
1757 print "$h\$\n";
1758}
1759
1760[TheEnd]
1761}
1762
1763if( $GenKey{'r'} ){
1764 $Proto .= "sub _r(\$);\n";
1765 $Func .= <<'[TheEnd]';
1766# _r: r command processing: Save a reference to the pathname.
1767#
1768sub _r($){
1769 my $path = shift();
1770 push( @Q, \$path );
1771}
1772
1773[TheEnd]
1774}
1775
1776if( $GenKey{'t'} ){
1777 $Proto .= "sub _t();\n";
1778 $Func .= <<'[TheEnd]';
1779# _t: t command - condition register test/reset
1780#
1781sub _t(){
1782 my $res = $CondReg;
1783 $CondReg = 0;
1784 $res;
1785}
1786
1787[TheEnd]
1788}
1789
1790if( $GenKey{'w'} ){
1791 $Proto .= "sub _w(\$);\n";
1792 $Func .= <<'[TheEnd]';
1793# _w: w command and s command's w flag - write to file
1794#
1795sub _w($){
1796 my $path = shift();
1797 my $handle = $wFiles{$path};
1798 if( ! $doOpenWrite && ! defined( fileno( $handle ) ) ){
1799 open( $handle, ">$path" )
1800 || die( "$0: $path: cannot open ($!)\n" );
1801 }
1802 print $handle $_, "\n";
1803}
1804
1805[TheEnd]
1806}
1807
1808$Code = $Proto . $Code;
1809
86a59229
WL
1810# magic "#n" - same as -n option
1811#
1812$doAutoPrint = 0 if substr( $Commands[0], 0, 2 ) eq '#n';
a687059c 1813
86a59229
WL
1814# eval code - check for errors
1815#
d16f50bd
WL
1816print "Code:\n$Code$Func" if $useDEBUG;
1817eval $Code . $Func;
86a59229 1818if( $@ ){
d16f50bd 1819 print "Code:\n$Code$Func";
86a59229 1820 die( "$0: internal error - generated incorrect Perl code: $@\n" );
9ef589d8
LW
1821}
1822
86a59229
WL
1823if( $doGenerate ){
1824
1825 # write full Perl program
1826 #
1827
d16f50bd 1828 # bang line, declarations, prototypes
86a59229
WL
1829 print <<TheEnd;
1830#!$perlpath -w
1831eval 'exec $perlpath -S \$0 \${1+"\$@"}'
1832 if 0;
9ae2b9a0 1833\$0 =~ s/^.*?(\\w+)\[\\.\\w+\]*\$/\$1/;
86a59229
WL
1834
1835use strict;
1836use Symbol;
1837use vars qw{ \$isEOF \$Hold \%wFiles \@Q \$CondReg
1838 \$doAutoPrint \$doOpenWrite \$doPrint };
1839\$doAutoPrint = $doAutoPrint;
1840\$doOpenWrite = $doOpenWrite;
1841TheEnd
1842
1843 my $wf = "'" . join( "', '", keys( %wFiles ) ) . "'";
1844 if( $wf ne "''" ){
1845 print <<TheEnd;
1846sub makeHandle(\$);
1847for my \$p ( $wf ){
1848 exit( 1 ) unless makeHandle( \$p );
9ef589d8 1849}
86a59229
WL
1850TheEnd
1851 }
9ef589d8 1852
86a59229 1853 print $Code;
d16f50bd
WL
1854 print "Run();\n";
1855 print $Func;
86a59229 1856 exit( 0 );
1aa91729 1857
86a59229
WL
1858} else {
1859
1860 # execute: make handles (and optionally open) all w files; run!
86a59229
WL
1861 for my $p ( keys( %wFiles ) ){
1862 exit( 1 ) unless makeHandle( $p );
1863 }
d16f50bd 1864 Run();
1aa91729 1865}
86a59229
WL
1866
1867
1868=head1 ENVIRONMENT
1869
1870The environment variable C<PSEDEXTBRE> may be set to extend BREs.
1871See L<"Additional Atoms">.
1872
1873=head1 DIAGNOSTICS
1874
1875=over 4
1876
c2b27382 1877=item ambiguous translation for character '%s' in 'y' command
86a59229
WL
1878
1879The indicated character appears twice, with different translations.
1880
c2b27382 1881=item '[' cannot be last in pattern
86a59229 1882
c2b27382 1883A '[' in a BRE indicates the beginning of a I<bracket expression>.
86a59229 1884
c2b27382 1885=item '\' cannot be last in pattern
86a59229 1886
c2b27382 1887A '\' in a BRE is used to make the subsequent character literal.
86a59229 1888
c2b27382 1889=item '\' cannot be last in substitution
86a59229 1890
c2b27382 1891A '\' in a substitution string is used to make the subsequent character literal.
86a59229 1892
c2b27382 1893=item conflicting flags '%s'
86a59229 1894
c2b27382 1895In an B<s> command, either the 'g' flag and an n-th occurrence flag, or
86a59229 1896multiple n-th occurrence flags are specified. Note that only the digits
c2b27382 1897^'1' through '9' are permitted.
86a59229
WL
1898
1899=item duplicate label %s (first defined at %s)
1900
1901=item excess address(es)
1902
1903The command has more than the permitted number of addresses.
1904
1905=item extra characters after command (%s)
1906
c2b27382 1907=item illegal option '%s'
86a59229
WL
1908
1909=item improper delimiter in s command
1910
c2b27382 1911The BRE and substitution may not be delimited with '\' or newline.
86a59229 1912
c2b27382 1913=item invalid address after ','
86a59229
WL
1914
1915=item invalid backreference (%s)
1916
1917The specified backreference number exceeds the number of backreferences
1918in the BRE.
1919
c2b27382 1920=item invalid repeat clause '\{%s\}'
86a59229
WL
1921
1922The repeat clause does not contain a valid integer value, or pair of
1923values.
1924
1925=item malformed regex, 1st address
1926
1927=item malformed regex, 2nd address
1928
1929=item malformed regular expression
1930
1931=item malformed substitution expression
1932
c2b27382 1933=item malformed 'y' command argument
86a59229
WL
1934
1935The first or second string of a B<y> command is syntactically incorrect.
1936
c2b27382 1937=item maximum less than minimum in '\{%s\}'
86a59229
WL
1938
1939=item no script command given
1940
1941There must be at least one B<-e> or one B<-f> option specifying a
1942script or script file.
1943
c2b27382 1944=item '\' not valid as delimiter in 'y' command
86a59229
WL
1945
1946=item option -e requires an argument
1947
1948=item option -f requires an argument
1949
c2b27382 1950=item 's' command requires argument
86a59229 1951
c2b27382 1952=item start of unterminated '{'
86a59229 1953
c2b27382 1954=item string lengths in 'y' command differ
86a59229 1955
863b2ca0 1956The translation table strings in a B<y> command must have equal lengths.
86a59229 1957
c2b27382 1958=item undefined label '%s'
86a59229 1959
c2b27382 1960=item unexpected '}'
86a59229
WL
1961
1962A B<}> command without a preceding B<{> command was encountered.
1963
1964=item unexpected end of script
1965
1966The end of the script was reached although a text line after a
1967B<a>, B<c> or B<i> command indicated another line.
1968
c2b27382 1969=item unknown command '%s'
86a59229 1970
c2b27382 1971=item unterminated '['
86a59229
WL
1972
1973A BRE contains an unterminated bracket expression.
1974
c2b27382 1975=item unterminated '\('
86a59229
WL
1976
1977A BRE contains an unterminated backreference.
1978
c2b27382 1979=item '\{' without closing '\}'
86a59229
WL
1980
1981A BRE contains an unterminated bounds specification.
1982
c2b27382 1983=item '\)' without preceding '\('
86a59229 1984
c2b27382 1985=item 'y' command requires argument
86a59229
WL
1986
1987=back
1988
1989=head1 EXAMPLE
1990
1991The basic material for the preceding section was generated by running
1992the sed script
1993
1994 #no autoprint
1995 s/^.*Warn( *"\([^"]*\)".*$/\1/
1996 t process
1997 b
1998 :process
1999 s/$!/%s/g
2000 s/$[_[:alnum:]]\{1,\}/%s/g
2001 s/\\\\/\\/g
2002 s/^/=item /
2003 p
2004
2005on the program's own text, and piping the output into C<sort -u>.
2006
2007
2008=head1 SED SCRIPT TRANSLATION
2009
2010If this program is invoked with the name F<s2p> it will act as a
2011sed-to-Perl translator. After option processing (all other
2012arguments are ignored), a Perl program is printed on standard
2013output, which will process the input stream (as read from all
2014arguments) in the way defined by the sed script and the option setting
2015used for the translation.
2016
2017=head1 SEE ALSO
2018
2019perl(1), re_format(7)
2020
2021=head1 BUGS
2022
c2b27382 2023The B<l> command will show escape characters (ESC) as 'C<\e>', but
86a59229
WL
2024a vertical tab (VT) in octal.
2025
2026Trailing spaces are truncated from labels in B<:>, B<t> and B<b> commands.
2027
c2b27382 2028The meaning of an empty regular expression ('C<//>'), as defined by B<sed>,
86a59229
WL
2029is "the last pattern used, at run time". This deviates from the Perl
2030interpretation, which will re-use the "last last successfully executed
2031regular expression". Since keeping track of pattern usage would create
2032terribly cluttered code, and differences would only appear in obscure
2033context (where other B<sed> implementations appear to deviate, too),
2034the Perl semantics was adopted. Note that common usage of this feature,
2035such as in C</abc/s//xyz/>, will work as expected.
2036
2037Collating elements (of bracket expressions in BREs) are not implemented.
2038
2039=head1 STANDARDS
2040
2041This B<sed> implementation conforms to the IEEE Std1003.2-1992 ("POSIX.2")
2042definition of B<sed>, and is compatible with the I<OpenBSD>
2043implementation, except where otherwise noted (see L<"BUGS">).
2044
2045=head1 AUTHOR
2046
2047This Perl implementation of I<sed> was written by Wolfgang Laun,
2048I<Wolfgang.Laun@alcatel.at>.
2049
2050=head1 COPYRIGHT and LICENSE
2051
2052This program is free and open software. You may use, modify,
2053distribute, and sell this program (and any modified variants) in any
2054way you wish, provided you do not restrict others from doing the same.
2055
2056=cut
2057
a687059c 2058!NO!SUBS!
4633a7c4
LW
2059
2060close OUT or die "Can't close $file: $!";
2061chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
d38b618a 2062unlink 'psed';
96021e87 2063print "Linking $file to psed.\n";
d38b618a 2064if (defined $Config{d_link}) {
96021e87 2065 link $file, 'psed';
d38b618a
A
2066} else {
2067 unshift @INC, '../lib';
2068 require File::Copy;
2069 File::Copy::syscopy('s2p', 'psed');
2070}
4633a7c4 2071exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
8a5546a1 2072chdir $origdir;