use strict;
use Carp ();
use vars qw($VERSION );
-$VERSION = '3.20';
+$VERSION = '3.23';
#use constant DEBUG => 7;
BEGIN {
require Pod::Simple;
}
}
+ if(!$self->parse_characters && !$self->{'encoding'}) {
+ $self->_try_encoding_guess($line)
+ }
DEBUG > 5 and print "# Parsing line: [$line]\n";
# HERE WE CATCH =encoding EARLY!
if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
+ next if $self->parse_characters; # Ignore this line
$line = $self->_handle_encoding_line( $line );
}
sub _handle_encoding_line {
my($self, $line) = @_;
+ return if $self->parse_characters;
+
# The point of this routine is to set $self->{'_transcoder'} as indicated.
return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
return;
}
+sub _try_encoding_guess {
+ my ($self,$line) = @_;
+
+ if(!$self->{'in_pod'} and $line !~ /^=/m) {
+ return; # don't whine about non-ASCII bytes in code/comments
+ }
+
+ return unless $line =~ /[^\x00-\x7f]/; # Look for non-ASCII byte
+
+ my $encoding = $line =~ /[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
+ $self->_handle_encoding_line( "=encoding $encoding" );
+ $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
+
+ my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;
+
+ $self->whine(
+ $self->{'line_count'},
+ "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
+ );
+
+}
+
#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
{
my $scratch;
$self->_handle_element_start(($scratch=$name), shift @_);
- foreach my $x (@_) {
- if(ref($x)) {
+ while (@_) {
+ my $x = shift;
+ if (ref($x)) {
&_traverse_treelet_bit($self, @$x);
} else {
+ $x .= shift while @_ && !ref($_[0]);
$self->_handle_text($x);
}
}