Revert change #28980 per Jarkko's suggestion

[perl5.git] / ext / Encode / bin / enc2xs
diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs

index 1acb613..95067c8 100644 (file)
--- a/ext/Encode/bin/enc2xs
+++ b/ext/Encode/bin/enc2xs
@@ -1,17 +1,21 @@
-#!../../../perl -w
+#!./perl
  BEGIN {
-    unshift @INC, qw(../../lib ../../../lib ../../../../lib);
-    $ENV{PATH} .= ';../..;../../..;../../../..' if $^O eq 'MSWin32';
+    # @INC poking  no longer needed w/ new MakeMaker and Makefile.PL's
+    # with $ENV{PERL_CORE} set
+    # In case we need it in future...
+    require Config; import Config;
  }
  use strict;
+use warnings;
  use Getopt::Std;
+use Config;
  my @orig_ARGV = @ARGV;
-our $VERSION  = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
+our $VERSION  = do { my @r = (q$Revision: 2.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
  
  # These may get re-ordered.
  # RAW is a do_now as inserted by &enter
  # AGG is an aggreagated do_now, as built up by &process
+
  use constant {
    RAW_NEXT => 0,
    RAW_IN_LEN => 1,
@@ -26,6 +30,7 @@ use constant {
    AGG_OUT_LEN => 5,
    AGG_FALLBACK => 6,
  };
+
  # (See the algorithm in encengine.c - we're building structures for it)
  
  # There are two sorts of structures.
@@ -128,9 +133,10 @@ my %opt;
  # -o <output> to specify the output file name (else it's the first arg)
  # -f <inlist> to give a file with a list of input files (else use the args)
  # -n <name> to name the encoding (else use the basename of the input file.
-getopts('M:SQqOo:f:n:',\%opt);
+getopts('CM:SQqOo:f:n:',\%opt);
  
  $opt{M} and make_makefile_pl($opt{M}, @ARGV);
+$opt{C} and make_configlocal_pm($opt{C}, @ARGV);
  
  # This really should go first, else the die here causes empty (non-erroneous)
  # output files to be written.
@@ -154,10 +160,10 @@ my $hname = $cname;
  
  my ($doC,$doEnc,$doUcm,$doPet);
  
-if ($cname =~ /\.(c|xs)$/)
+if ($cname =~ /\.(c|xs)$/i) # VMS may have upcased filenames with DECC$ARGV_PARSE_STYLE defined
   {
    $doC = 1;
-  $dname =~ s/(\.[^\.]*)?$/_def.h/;
+  $dname =~ s/(\.[^\.]*)?$/.exh/;
    chmod(0666,$dname) if -f $cname && !-w $dname;
    open(D,">$dname") || die "Cannot open $dname:$!";
    $hname =~ s/(\.[^\.]*)?$/.h/;
@@ -171,6 +177,7 @@ if ($cname =~ /\.(c|xs)$/)
   !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
   This file was autogenerated by:
   $^X $0 @orig_ARGV
+ enc2xs VERSION $VERSION
  */
  END
    }
@@ -182,7 +189,7 @@ END
      print C "#include <XSUB.h>\n";
      print C "#define U8 U8\n";
     }
-  print C "#include \"encode.h\"\n";
+  print C "#include \"encode.h\"\n\n";
  
   }
  elsif ($cname =~ /\.enc$/)
@@ -200,6 +207,9 @@ elsif ($cname =~ /\.pet$/)
  
  my %encoding;
  my %strings;
+my $string_acc;
+my %strings_in_acc;
+
  my $saved = 0;
  my $subsave = 0;
  my $strings = 0;
@@ -246,18 +256,41 @@ if ($doC)
    foreach my $name (sort cmp_name keys %encoding)
     {
      my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
-    output(\*C,$name.'_utf8',$e2u);
-    output(\*C,'utf8_'.$name,$u2e);
-    push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
+    process($name.'_utf8',$e2u);
+    addstrings(\*C,$e2u);
+
+    process('utf8_'.$name,$u2e);
+    addstrings(\*C,$u2e);
+   }
+  outbigstring(\*C,"enctable");
+  foreach my $name (sort cmp_name keys %encoding)
+   {
+    my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
+    outtable(\*C,$e2u, "enctable");
+    outtable(\*C,$u2e, "enctable");
+
+    # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
     }
    foreach my $enc (sort cmp_name keys %encoding)
     {
-    my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
-    my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
+    # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
+    my ($e2u,$u2e,$rep,$min_el,$max_el) = @{$encoding{$enc}};
+    #my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
+    my $replen = 0; 
+    $replen++ while($rep =~ /\G\\x[0-9A-Fa-f]/g);
      my $sym = "${enc}_encoding";
      $sym =~ s/\W+/_/g;
-    print C "encode_t $sym = \n";
-    print C " {",join(',',@info,"{\"$enc\",(const char *)0}"),"};\n\n";
+    my @info = ($e2u->{Cname},$u2e->{Cname},"${sym}_rep_character",$replen,
+        $min_el,$max_el);
+    print C "static const U8 ${sym}_rep_character[] = \"$rep\";\n";
+    print C "static const char ${sym}_enc_name[] = \"$enc\";\n\n";
+    print C "const encode_t $sym = \n";
+    # This is to make null encoding work -- dankogai
+    for (my $i = (scalar @info) - 1;  $i >= 0; --$i){
+    $info[$i] ||= 1;
+    }
+    # end of null tweak -- dankogai
+    print C " {",join(',',@info,"{${sym}_enc_name,(const char *)0}"),"};\n\n";
     }
  
    foreach my $enc (sort cmp_name keys %encoding)
@@ -303,8 +336,8 @@ END
    close(D) or warn "Error closing '$dname': $!";
    close(H) or warn "Error closing '$hname': $!";
  
-  my $perc_saved    = $strings/($strings + $saved) * 100;
-  my $perc_subsaved = $strings/($strings + $subsave) * 100;
+  my $perc_saved    = $saved/($strings + $saved) * 100;
+  my $perc_subsaved = $subsave/($strings + $subsave) * 100;
    printf STDERR "%d bytes in string tables\n",$strings;
    printf STDERR "%d bytes (%.3g%%) saved spotting duplicates\n",
      $saved, $perc_saved              if $saved;
@@ -364,10 +397,12 @@ sub compile_ucm
   my $min_el;
   if (exists $attr{'subchar'})
    {
-   my @byte;
-   $attr{'subchar'} =~ /^\s*/cg;
-   push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg;
-   $erep = join('',map(chr(hex($_)),@byte));
+   #my @byte;
+   #$attr{'subchar'} =~ /^\s*/cg;
+   #push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg;
+   #$erep = join('',map(chr(hex($_)),@byte));
+   $erep = $attr{'subchar'}; 
+   $erep =~ s/^\s+//; $erep =~ s/\s+$//;
    }
   print "Reading $name ($cs)\n";
   my $nfb = 0;
@@ -377,16 +412,18 @@ sub compile_ucm
     s/#.*$//;
     last if /^\s*END\s+CHARMAP\s*$/i;
     next if /^\s*$/;
-   my ($u,@byte);
-   my $fb = '';
-   $u = $1 if (/^<U([0-9a-f]+)>\s+/igc);
-   push(@byte,$1) while /\G\\x([0-9a-f]+)/igc;
-   $fb = $1 if /\G\s*(\|[0-3])/gc;
-   # warn "$_: $u @byte | $fb\n";
-   die "Bad line:$_" unless /\G\s*(#.*)?$/gc;
-   if (defined($u))
+   my (@uni, @byte) = ();
+   my ($uni, $byte, $fb) = m/^(\S+)\s+(\S+)\s+(\S+)\s+/o
+       or die "Bad line: $_";
+   while ($uni =~  m/\G<([U0-9a-fA-F\+]+)>/g){
+       push @uni, map { substr($_, 1) } split(/\+/, $1);
+   }
+   while ($byte =~ m/\G\\x([0-9a-fA-F]+)/g){
+       push @byte, $1;
+   }
+   if (@uni)
      {
-     my $uch = encode_U(hex($u));
+     my $uch =  join('', map { encode_U(hex($_)) } @uni );
       my $ech = join('',map(chr(hex($_)),@byte));
       my $el  = length($ech);
       $max_el = $el if (!defined($max_el) || $el > $max_el);
@@ -579,43 +616,6 @@ sub enter_fb0 {
    }
  }
  
-
-sub outstring
-{
- my ($fh,$name,$s) = @_;
- my $sym = $strings{$s};
- if ($sym)
-  {
-   $saved += length($s);
-  }
- else
-  {
-   if ($opt{'O'}) {
-       foreach my $o (keys %strings)
-        {
-         next unless (my $i = index($o,$s)) >= 0;
-         $sym = $strings{$o};
-         # gcc things that 0x0e+0x10 (anything with e+) starts to look like
-         # a hexadecimal floating point constant. Silly gcc. Only p
-         # introduces a floating point constant. Put the space in to stop it
-         # getting confused.
-         $sym .= sprintf(" +0x%02x",$i) if ($i);
-         $subsave += length($s);
-         return $strings{$s} = $sym;
-       }
-   }
-   $strings{$s} = $sym = $name;
-   $strings += length($s);
-   my $definition = sprintf "static const U8 %s[%d] = { ",$name,length($s);
-   # Maybe we should assert that these are all <256.
-   $definition .= join(',',unpack "C*",$s);
-   # We have a single long line. Split it at convenient commas.
-   $definition =~ s/(.{74,77},)/$1\n/g;
-   print $fh "$definition };\n\n";
-  }
- return $sym;
-}
-
  sub process
  {
    my ($name,$a) = @_;
@@ -676,7 +676,8 @@ sub process
    $a->{'Entries'} = \@ent;
  }
  
-sub outtable
+
+sub addstrings
  {
   my ($fh,$a) = @_;
   my $name = $a->{'Cname'};
@@ -684,29 +685,116 @@ sub outtable
   foreach my $b (@{$a->{'Entries'}})
    {
     next unless $b->[AGG_OUT_LEN];
-   my $s = $b->[AGG_MIN_IN];
-   my $e = $b->[AGG_MAX_IN];
-   outstring($fh,sprintf("%s__%02x_%02x",$name,$s,$e),$b->[AGG_OUT_BYTES]);
+   $strings{$b->[AGG_OUT_BYTES]} = undef;
    }
   if ($a->{'Forward'})
    {
-   print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";
+   my $cpp = ($Config{d_cplusplus} || '') eq 'define';
+   my $var = $^O eq 'MacOS' || $cpp ? 'extern' : 'static';
+   my $const = $cpp ? '' : 'const';
+   print $fh "$var $const encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n";
+  }
+ $a->{'DoneStrings'} = 1;
+ foreach my $b (@{$a->{'Entries'}})
+  {
+   my ($s,$e,$out,$t,$end,$l) = @$b;
+   addstrings($fh,$t) unless $t->{'DoneStrings'};
+  }
+}
+
+sub outbigstring
+{
+  my ($fh,$name) = @_;
+
+  $string_acc = '';
+
+  # Make the big string in the string accumulator. Longest first, on the hope
+  # that this makes it more likely that we find the short strings later on.
+  # Not sure if it helps sorting strings of the same length lexcically.
+  foreach my $s (sort {length $b <=> length $a || $a cmp $b} keys %strings) {
+    my $index = index $string_acc, $s;
+    if ($index >= 0) {
+      $saved += length($s);
+      $strings_in_acc{$s} = $index;
+    } else {
+    OPTIMISER: {
+    if ($opt{'O'}) {
+      my $sublength = length $s;
+      while (--$sublength > 0) {
+        # progressively lop characters off the end, to see if the start of
+        # the new string overlaps the end of the accumulator.
+        if (substr ($string_acc, -$sublength)
+        eq substr ($s, 0, $sublength)) {
+          $subsave += $sublength;
+          $strings_in_acc{$s} = length ($string_acc) - $sublength;
+          # append the last bit on the end.
+          $string_acc .= substr ($s, $sublength);
+          last OPTIMISER;
+        }
+        # or if the end of the new string overlaps the start of the
+        # accumulator
+        next unless substr ($string_acc, 0, $sublength)
+          eq substr ($s, -$sublength);
+        # well, the last $sublength characters of the accumulator match.
+        # so as we're prepending to the accumulator, need to shift all our
+        # existing offsets forwards
+        $_ += $sublength foreach values %strings_in_acc;
+        $subsave += $sublength;
+        $strings_in_acc{$s} = 0;
+        # append the first bit on the start.
+        $string_acc = substr ($s, 0, -$sublength) . $string_acc;
+        last OPTIMISER;
+      }
+    }
+    # Optimiser (if it ran) found nothing, so just going have to tack the
+    # whole thing on the end.
+    $strings_in_acc{$s} = length $string_acc;
+    $string_acc .= $s;
+      };
+    }
    }
+
+  $strings = length $string_acc;
+  my $cpp = ($Config{d_cplusplus} || '') eq 'define';
+  my $var = $cpp ? '' : 'static';
+  my $definition = "\n$var const U8 $name\[$strings] = { " .
+    join(',',unpack "C*",$string_acc);
+  # We have a single long line. Split it at convenient commas.
+  print $fh $1, "\n" while $definition =~ /\G(.{74,77},)/gcs;
+  print $fh substr ($definition, pos $definition), " };\n";
+}
+
+sub findstring {
+  my ($name,$s) = @_;
+  my $offset = $strings_in_acc{$s};
+  die "Can't find string " . join (',',unpack "C*",$s) . " in accumulator"
+    unless defined $offset;
+  "$name + $offset";
+}
+
+sub outtable
+{
+ my ($fh,$a,$bigname) = @_;
+ my $name = $a->{'Cname'};
   $a->{'Done'} = 1;
   foreach my $b (@{$a->{'Entries'}})
    {
     my ($s,$e,$out,$t,$end,$l) = @$b;
-   outtable($fh,$t) unless $t->{'Done'};
+   outtable($fh,$t,$bigname) unless $t->{'Done'};
    }
- print $fh "\nstatic encpage_t $name\[",scalar(@{$a->{'Entries'}}),"] = {\n";
+ my $cpp = ($Config{d_cplusplus} || '') eq 'define';
+ my $var = $cpp ? '' : 'static';
+ my $const = $cpp ? '' : 'const';
+ print $fh "\n$var $const encpage_t $name\[",
+   scalar(@{$a->{'Entries'}}), "] = {\n";
   foreach my $b (@{$a->{'Entries'}})
    {
     my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b;
-   $end |= 0x80 if $fb;
+   # $end |= 0x80 if $fb; # what the heck was on your mind, Nick?  -- Dan
     print  $fh "{";
     if ($l)
      {
-     printf $fh outstring($fh,'',$out);
+     printf $fh findstring($bigname,$out);
      }
     else
      {
@@ -718,14 +806,6 @@ sub outtable
   print $fh "};\n";
  }
  
-sub output
-{
- my ($fh,$name,$a) = @_;
- process($name,$a);
- # Sub-tables
- outtable($fh,$a);
-}
-
  sub output_enc
  {
   my ($fh,$name,$a) = @_;
@@ -828,215 +908,148 @@ sub output_ucm
   print $fh "END CHARMAP\n";
  }
  
-sub make_makefile_pl
-{
-    eval { require Encode; };
-    $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n";
-    eval { require File::Basename; };
-    $@ and die "File::Basename needed.  Are you on miniperl?;\nerror: $@\n";
-    File::Basename->import();
-    my $inc = dirname($INC{"Encode/Internal.pm"});
-    my $name = shift;
-    my $table_files = join(",", map {qq('$_')} @_);
-    my $now = scalar localtime();
-    open my $fh, ">Makefile.PL" or die "$!";
-    print $fh <<"END_OF_HEADER";
-#
-# This file is auto-generated by:
-# $0
-# $now
-#
-use 5.7.2;
-use strict;
-use ExtUtils::MakeMaker;
-
-# Please edit the following to the taste!
-my \$name = '$name';
-my \%tables = (
-             encode_t   => [ $table_files ],
-             );
-
-# And leave the rest!
-my \$enc2xs = '$0';
-WriteMakefile(
-              INC              => "-I$inc",
-END_OF_HEADER
-
-    print $fh <<'END_OF_MAKEFILE_PL';
-             NAME              => 'Encode::'.$name,
-             VERSION_FROM      => "$name.pm",
-             OBJECT            => '$(O_FILES)',
-             'dist'            => {
-                 COMPRESS      => 'gzip -9f',
-                 SUFFIX        => 'gz',
-                 DIST_DEFAULT => 'all tardist',
-             },
-             MAN3PODS  => {},
-             # OS 390 winges about line numbers > 64K ???
-             XSOPT => '-nolinenumbers',
-             );
-
-package MY;
-
-sub post_initialize
-{
-    my ($self) = @_;
-    my %o;
-    my $x = $self->{'OBJ_EXT'};
-    # Add the table O_FILES
-    foreach my $e (keys %tables)
-    {
-       $o{$e.$x} = 1;
+use vars qw(
+    $_Enc2xs
+    $_Version
+    $_Inc
+    $_E2X 
+    $_Name
+    $_TableFiles
+    $_Now
+);
+
+sub find_e2x{
+    eval { require File::Find; };
+    my (@inc, %e2x_dir);
+    for my $inc (@INC){
+    push @inc, $inc unless $inc eq '.'; #skip current dir
      }
-    $o{"$name$x"} = 1;
-    $self->{'O_FILES'} = [sort keys %o];
-    my @files = ("$name.xs");
-    $self->{'C'} = ["$name.c"];
-    # $self->{'H'} = [$self->catfile($self->updir,'encode.h')];
-    my %xs;
-    foreach my $table (keys %tables) {
-       push (@{$self->{'C'}},"$table.c");
-       # Do NOT add $table.h etc. to H_FILES unless we own up as to how they
-       # get built.
-       foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) {
-           push (@files,$table.$ext);
-       }
-    }
-    $self->{'XS'} = { "$name.xs" => "$name.c" };
-    $self->{'clean'}{'FILES'} .= join(' ',@files);
-    open(XS,">$name.xs") || die "Cannot open $name.xs:$!";
-    print XS <<'END';
-#include <EXTERN.h>
-#include <perl.h>
-#include <XSUB.h>
-#define U8 U8
-#include "encode.h"
-END
-    foreach my $table (keys %tables) {
-       print XS qq[#include "${table}.h"\n];
+    File::Find::find(
+         sub {
+         my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
+             $atime,$mtime,$ctime,$blksize,$blocks)
+             = lstat($_) or return;
+         -f _ or return;
+         if (/^.*\.e2x$/o){
+             no warnings 'once';
+             $e2x_dir{$File::Find::dir} ||= $mtime;
+         }
+         return;
+         }, @inc);
+    warn join("\n", keys %e2x_dir), "\n";
+    for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){
+    $_E2X = $d;
+    # warn "$_E2X => ", scalar localtime($e2x_dir{$d});
+    return $_E2X;
      }
-    print XS <<"END";
-
-static void
-Encode_XSEncoding(pTHX_ encode_t *enc)
-{
- dSP;
- HV *stash = gv_stashpv("Encode::XS", TRUE);
- SV *sv    = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
- int i = 0;
- PUSHMARK(sp);
- XPUSHs(sv);
- while (enc->name[i])
-  {
-   const char *name = enc->name[i++];
-   XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
-  }
- PUTBACK;
- call_pv("Encode::define_encoding",G_DISCARD);
- SvREFCNT_dec(sv);
  }
  
-MODULE = Encode::$name PACKAGE = Encode::$name
-PROTOTYPES: DISABLE
-BOOT:
+sub make_makefile_pl
  {
-END
-    foreach my $table (keys %tables) {
-       print XS qq[#include "${table}_def.h"\n];
-    }
-    print XS "}\n";
-    close(XS);
-    return "# Built $name.xs\n\n";
+    eval { require Encode; };
+    $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n";
+    # our used for variable expanstion
+    $_Enc2xs = $0;
+    $_Version = $VERSION;
+    $_E2X = find_e2x();
+    $_Name = shift;
+    $_TableFiles = join(",", map {qq('$_')} @_);
+    $_Now = scalar localtime();
+
+    eval { require File::Spec; };
+    _print_expand(File::Spec->catfile($_E2X,"Makefile_PL.e2x"),"Makefile.PL");
+    _print_expand(File::Spec->catfile($_E2X,"_PM.e2x"),        "$_Name.pm");
+    _print_expand(File::Spec->catfile($_E2X,"_T.e2x"),         "t/$_Name.t");
+    _print_expand(File::Spec->catfile($_E2X,"README.e2x"),     "README");
+    _print_expand(File::Spec->catfile($_E2X,"Changes.e2x"),    "Changes");
+    exit;
  }
  
-sub postamble
-{
-    my $self = shift;
-    my $dir  = "."; # $self->catdir('Encode');
-    my $str  = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n";
-    $str    .= "$name.c : $name.xs ";
-    foreach my $table (keys %tables)
-    {
-       $str .= " $table.c";
-    }
-    $str .= "\n\n";
-    $str .= "$name\$(OBJ_EXT) : $name.c\n\n";
+use vars qw(
+        $_ModLines
+        $_LocalVer
+        );
  
-    foreach my $table (keys %tables)
-    {
-       my $numlines = 1;
-       my $lengthsofar = length($str);
-       my $continuator = '';
-       $str .= "$table.c : Makefile.PL";
-       foreach my $file (@{$tables{$table}})
-       {
-           $str .= $continuator.' '.$self->catfile($dir,$file);
-           if ( length($str)-$lengthsofar > 128*$numlines )
-           {
-               $continuator .= " \\\n\t";
-               $numlines++;
-           } else {
-               $continuator = '';
-           }
-       }
-        $str .= $^O eq 'VMS' # In VMS quote to preserve case
-            ? qq{\n\t\$(PERL) $enc2xs -"Q" -"O" -o \$\@ -f $table.fnm\n\n}
-            : qq{\n\t\$(PERL) $enc2xs -Q -O -o \$\@ -f $table.fnm\n\n};
-       open (FILELIST, ">$table.fnm")
-           || die "Could not open $table.fnm: $!";
-       foreach my $file (@{$tables{$table}})
-       {
-           print FILELIST $self->catfile($dir,$file) . "\n";
+sub make_configlocal_pm {
+    eval { require Encode; };
+    $@ and die "Unable to require Encode: $@\n";
+    eval { require File::Spec; };
+
+    # our used for variable expanstion
+    my %in_core = map { $_ => 1 } (
+        'ascii',      'iso-8859-1', 'utf8',
+        'ascii-ctrl', 'null',       'utf-8-strict'
+    );
+    my %LocalMod = ();
+    # check @enc;
+    use File::Find ();
+    my $wanted = sub{
+       -f $_ or return;
+       $File::Find::name =~ /\A\./        and return;
+       $File::Find::name =~ /\.pm\z/      or  return;
+       $File::Find::name =~ m/\bEncode\b/ or  return;
+       my $mod = $File::Find::name;
+       $mod =~ s/.*\bEncode\b/Encode/o;
+       $mod =~ s/\.pm\z//o;
+       $mod =~ s,/,::,og;
+       warn qq{ require $mod;\n};
+       eval qq{ require $mod; };
+       $@ and die "Can't require $mod: $@\n";
+       for my $enc ( Encode->encodings() ) {
+           no warnings;
+           $in_core{$enc}                   and next;
+           $Encode::Config::ExtModule{$enc} and next;
+           $LocalMod{$enc} ||= $mod;
         }
-       close(FILELIST);
+    };
+    File::Find::find({wanted => $wanted}, @INC);
+    $_ModLines = "";
+    for my $enc ( sort keys %LocalMod ) {
+        $_ModLines .=
+          qq(\$Encode::ExtModule{'$enc'} = "$LocalMod{$enc}";\n);
      }
-    return $str;
-}
-END_OF_MAKEFILE_PL
-    close $fh;
-    (my $pm =<<"END_OF_PM") =~ s/^# //gm;
-# package Encode::$name;
-# our \$VERSION = "0.01";
-# 
-# use Encode;
-# use XSLoader;
-# XSLoader::load('Encode::$name', \$VERSION);
-# 
-# 1;
-# __END__
-# 
-# =head1 NAME
-# 
-# Encode::$name - New Encoding
-# 
-# =head1 SYNOPSIS
-#
-# You got to fill this in!
-# 
-# =head1 SEE ALSO
-# 
-# L<Encode>
-# 
-# =cut
-END_OF_PM
-    open $fh, ">$name.pm" or die "$name.pm:$!";
-    print $fh $pm;
-    close $fh;
-    -d 't' or mkdir 't', 0755 or die "mkdir t:$!";
-    open $fh, ">t/$name.t" or die "t/$name.t:$!";
-print $fh <<"END_OF_TEST";
-use strict;
-# Adjust the number here!
-use Test::More tests => 2;
-
-use_ok('Encode');
-use_ok('Encode::$name');
-# Add more test here!
-END_OF_TEST
-    close $fh;
+    warn $_ModLines;
+    $_LocalVer = _mkversion();
+    $_E2X      = find_e2x();
+    $_Inc      = $INC{"Encode.pm"};
+    $_Inc =~ s/\.pm$//o;
+    _print_expand( File::Spec->catfile( $_E2X, "ConfigLocal_PM.e2x" ),
+        File::Spec->catfile( $_Inc, "ConfigLocal.pm" ), 1 );
      exit;
  }
  
+sub _mkversion{
+    # v-string is now depreciated; use time() instead;
+    #my ($ss,$mm,$hh,$dd,$mo,$yyyy) = localtime();
+    #$yyyy += 1900, $mo +=1;
+    #return sprintf("v%04d.%04d.%04d", $yyyy, $mo*100+$dd, $hh*100+$mm);
+    return time();
+}
+
+sub _print_expand{
+    eval { require File::Basename; };
+    $@ and die "File::Basename needed.  Are you on miniperl?;\nerror: $@\n";
+    File::Basename->import();
+    my ($src, $dst, $clobber) = @_;
+    if (!$clobber and -e $dst){
+    warn "$dst exists. skipping\n";
+    return;
+    }
+    warn "Generating $dst...\n";
+    open my $in, $src or die "$src : $!";
+    if ((my $d = dirname($dst)) ne '.'){
+    -d $d or mkdir $d, 0755 or die  "mkdir $d : $!";
+    }     
+    open my $out, ">$dst" or die "$!";
+    my $asis = 0;
+    while (<$in>){ 
+    if (/^#### END_OF_HEADER/){
+        $asis = 1; next;
+    }    
+    s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis;
+    print $out $_;
+    }
+}
  __END__
  
  =head1 NAME
@@ -1045,31 +1058,32 @@ enc2xs -- Perl Encode Module Generator
  
  =head1 SYNOPSIS
  
-  enc2xs -M ModName mapfiles...
    enc2xs -[options]
+  enc2xs -M ModName mapfiles...
+  enc2xs -C
  
  =head1 DESCRIPTION
  
  F<enc2xs> builds a Perl extension for use by Encode from either
-Unicode Character Mapping files (.ucm) or Tcl Encoding Files
-(.enc)  Besides internally used during the build process of Encode
-module, you can use F<enc2xs> to add your own encoding to perl.  No
-knowledge on XS is necessary.
+Unicode Character Mapping files (.ucm) or Tcl Encoding Files (.enc).
+Besides being used internally during the build process of the Encode
+module, you can use F<enc2xs> to add your own encoding to perl.
+No knowledge of XS is necessary.
  
  =head1 Quick Guide
  
-If what you want to know as little about Perl possible but needs to
+If you want to know as little about Perl as possible but need to
  add a new encoding, just read this chapter and forget the rest.
  
  =over 4
  
  =item 0.
  
-Have a .ucm file ready.  You can get it from somewhere or you can
-write your own from scratch or you can grab one from Encode
-distribution and customize.  For UCM format, see the next Chapter.
-In the example below, I'll call my theoretical encoding myascii, 
-defined inI<my.ucm>.  C<$> is a shell prompt.
+Have a .ucm file ready.  You can get it from somewhere or you can write
+your own from scratch or you can grab one from the Encode distribution
+and customize it.  For the UCM format, see the next Chapter.  In the
+example below, I'll call my theoretical encoding myascii, defined
+in I<my.ucm>.  C<$> is a shell prompt.
  
    $ ls -F
    my.ucm
@@ -1079,29 +1093,45 @@ defined inI<my.ucm>.  C<$> is a shell prompt.
  Issue a command as follows;
  
    $ enc2xs -M My my.ucm
+  generating Makefile.PL
+  generating My.pm
+  generating README
+  generating Changes
  
  Now take a look at your current directory.  It should look like this.
  
    $ ls -F
    Makefile.PL   My.pm         my.ucm        t/
  
-The following files are created.
+The following files were created.
+
+  Makefile.PL - MakeMaker script
+  My.pm       - Encode submodule
+  t/My.t      - test file
+
+=over 4
+
+=item 1.1.
  
-  Makefle.PL - MakeMaker script
-  My.pm      - Encode Submodule
-  t/My.t     - test file
+If you want *.ucm installed together with the modules, do as follows;
+
+  $ mkdir Encode
+  $ mv *.ucm Encode
+  $ enc2xs -M My Encode/*ucm
+
+=back
  
  =item 2.
  
  Edit the files generated.  You don't have to if you have no time AND no
  intention to give it to someone else.  But it is a good idea to edit
-pod and add more tests.
+the pod and to add more tests.
  
  =item 3.
  
-Now issue a command all Perl Mongers love;
+Now issue a command all Perl Mongers love:
  
-  $ perl5.7.3 Makefile.PL
+  $ perl Makefile.PL
    Writing Makefile for Encode::My
  
  =item 4.
@@ -1115,15 +1145,15 @@ Now all you have to do is make.
    Reading myascii (myascii)
    Writing compiled form
    128 bytes in string tables
-  384 bytes (25%) saved spotting duplicates
-  1 bytes (99.2%) saved using substrings
+  384 bytes (75%) saved spotting duplicates
+  1 bytes (0.775%) saved using substrings
    ....
    chmod 644 blib/arch/auto/Encode/My/My.bs
    $
  
-The time it takes varies how fast your machine is and how large your
-encoding is.  Unless you are working on something big like euc-tw, it
-won't take too long.
+The time it takes varies depending on how fast your machine is and
+how large your encoding is.  Unless you are working on something big
+like euc-tw, it won't take too long.
  
  =item 5.
  
@@ -1142,17 +1172,27 @@ You can "make install" already but you should test first.
  
  If you are content with the test result, just "make install"
  
+=item 7.
+
+If you want to add your encoding to Encode's demand-loading list
+(so you don't have to "use Encode::YourEncoding"), run
+
+  enc2xs -C
+
+to update Encode::ConfigLocal, a module that controls local settings.
+After that, "use Encode;" is enough to load your encodings on demand.
+
  =back
  
  =head1 The Unicode Character Map
  
-Encode uses The Unicode Character Map (UCM) for source character
-mappings.  This format is used by ICU package of IBM and adopted by
-Nick Ing-Simmons.  Since UCM is more flexible than Tcl's Encoding Map
-and far more user-friendly,  This is the recommended formet for
-Encode now.
+Encode uses the Unicode Character Map (UCM) format for source character
+mappings.  This format is used by IBM's ICU package and was adopted
+by Nick Ing-Simmons for use with the Encode module.  Since UCM is
+more flexible than Tcl's Encoding Map and far more user-friendly,
+this is the recommended format for Encode now.
  
-UCM file looks like this.
+A UCM file looks like this.
  
    #
    # Comments
@@ -1178,25 +1218,25 @@ UCM file looks like this.
  
  =item *
  
-Anything that follows C<#> is treated as comments.
+Anything that follows C<#> is treated as a comment.
  
  =item *
  
-The header section continues until CHARMAP. This section Has a form of
-I<E<lt>keywordE<gt> value>, one at a line.  For a value, strings must
-be quoted. Barewords are treated as numbers.  I<\xXX> represents a
-byte.
+The header section continues until a line containing the word
+CHARMAP. This section has a form of I<E<lt>keywordE<gt> value>, one
+pair per line.  Strings used as values must be quoted. Barewords are
+treated as numbers.  I<\xXX> represents a byte.
  
  Most of the keywords are self-explanatory. I<subchar> means
  substitution character, not subcharacter.  When you decode a Unicode
  sequence to this encoding but no matching character is found, the byte
  sequence defined here will be used.  For most cases, the value here is
-\x3F, in ASCII this is a question mark.
+\x3F; in ASCII, this is a question mark.
  
  =item *
  
  CHARMAP starts the character map section.  Each line has a form as
-follows;
+follows:
  
    <UXXXX> \xXX.. |0 # comment
      ^     ^      ^
@@ -1204,20 +1244,21 @@ follows;
      |     +-------- Encoded byte sequence
      +-------------- Unicode Character ID in hex
  
-The format is roughly the same as a header section except for fallback
-flag.  It is | followed by 0..3.   And their meaning as follows
+The format is roughly the same as a header section except for the
+fallback flag: | followed by 0..3.   The meaning of the possible
+values is as follows:
  
-=over 2
+=over 4
  
  =item |0 
  
-Round trip safe.   A character decoded to Unicode encodes back to the
-same byte sequence. most character belong to this.
+Round trip safe.  A character decoded to Unicode encodes back to the
+same byte sequence.  Most characters have this flag.
  
  =item |1
  
  Fallback for unicode -> encoding.  When seen, enc2xs adds this
-character for encode map only
+character for the encode map only.
  
  =item |2 
  
@@ -1226,7 +1267,7 @@ Skip sub-char mapping should there be no code point.
  =item |3 
  
  Fallback for encoding -> unicode.  When seen, enc2xs adds this
-character for decode map only
+character for the decode map only.
  
  =back
  
@@ -1236,30 +1277,91 @@ And finally, END OF CHARMAP ends the section.
  
  =back
  
-Needless to say, if you are manually creating a UCM file, you should
-copy ascii.ucm or existing encoding which is close to yours than write
-your own from scratch. 
+When you are manually creating a UCM file, you should copy ascii.ucm
+or an existing encoding which is close to yours, rather than write
+your own from scratch.
  
  When you do so, make sure you leave at least B<U0000> to B<U0020> as
-is, unless your environment is on EBCDIC.
+is, unless your environment is EBCDIC.
  
  B<CAVEAT>: not all features in UCM are implemented.  For example,
  icu:state is not used.  Because of that, you need to write a perl
-module if you want to support algorithmical encodings, notablly
-ISO-2022 series.  Such modules include L<Encode::JP::2022_JP>,
+module if you want to support algorithmical encodings, notably
+the ISO-2022 series.  Such modules include L<Encode::JP::2022_JP>,
  L<Encode::KR::2022_KR>, and L<Encode::TW::HZ>.
  
+=head2 Coping with duplicate mappings
+
+When you create a map, you SHOULD make your mappings round-trip safe.
+That is, C<encode('your-encoding', decode('your-encoding', $data)) eq
+$data> stands for all characters that are marked as C<|0>.  Here is
+how to make sure:
+
+=over 4
+
+=item * 
+
+Sort your map in Unicode order.
+
+=item *
+
+When you have a duplicate entry, mark either one with '|1' or '|3'.
+  
+=item * 
+
+And make sure the '|1' or '|3' entry FOLLOWS the '|0' entry.
+
+=back
+
+Here is an example from big5-eten.
+
+  <U2550> \xF9\xF9 |0
+  <U2550> \xA2\xA4 |3
+
+Internally Encoding -> Unicode and Unicode -> Encoding Map looks like
+this;
+
+  E to U               U to E
+  --------------------------------------
+  \xF9\xF9 => U2550    U2550 => \xF9\xF9
+  \xA2\xA4 => U2550
+ 
+So it is round-trip safe for \xF9\xF9.  But if the line above is upside
+down, here is what happens.
+
+  E to U               U to E
+  --------------------------------------
+  \xA2\xA4 => U2550    U2550 => \xF9\xF9
+  (\xF9\xF9 => U2550 is now overwritten!)
+
+The Encode package comes with F<ucmlint>, a crude but sufficient
+utility to check the integrity of a UCM file.  Check under the
+Encode/bin directory for this.
+
+When in doubt, you can use F<ucmsort>, yet another utility under
+Encode/bin directory.
+
  =head1 Bookmarks
  
+=over 4
+
+=item *
+
  ICU Home Page 
  L<http://oss.software.ibm.com/icu/>
  
+=item *
+
  ICU Character Mapping Tables
  L<http://oss.software.ibm.com/icu/charset/>
  
+=item *
+
  ICU:Conversion Data
  L<http://oss.software.ibm.com/icu/userguide/conversion-data.html>
  
+=back
+
  =head1 SEE ALSO
  
  L<Encode>,