This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade to Pod-Simple 3.23
[perl5.git] / cpan / Pod-Simple / lib / Pod / Simple / BlackBox.pm
CommitLineData
351625bd
SP
1
2package Pod::Simple::BlackBox;
3#
4# "What's in the box?" "Pain."
5#
6###########################################################################
7#
8# This is where all the scary things happen: parsing lines into
9# paragraphs; and then into directives, verbatims, and then also
10# turning formatting sequences into treelets.
11#
12# Are you really sure you want to read this code?
13#
14#-----------------------------------------------------------------------------
15#
16# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
17# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
18# to call the proper callbacks on the treelets.
19#
20# Every node in a treelet is a ['name', {attrhash}, ...children...]
21
22use integer; # vroom!
23use strict;
24use Carp ();
a242eeb4 25use vars qw($VERSION );
0ace302a 26$VERSION = '3.23';
9d65762f 27#use constant DEBUG => 7;
351625bd
SP
28BEGIN {
29 require Pod::Simple;
30 *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
31}
32
33#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
34
35sub parse_line { shift->parse_lines(@_) } # alias
36
37# - - - Turn back now! Run away! - - -
38
39sub parse_lines { # Usage: $parser->parse_lines(@lines)
40 # an undef means end-of-stream
41 my $self = shift;
42
43 my $code_handler = $self->{'code_handler'};
44 my $cut_handler = $self->{'cut_handler'};
60527824 45 my $wl_handler = $self->{'whiteline_handler'};
351625bd
SP
46 $self->{'line_count'} ||= 0;
47
48 my $scratch;
49
50 DEBUG > 4 and
51 print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
52
53 DEBUG > 5 and
54 print "# About to parse lines: ",
55 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
56
57 my $paras = ($self->{'paras'} ||= []);
58 # paragraph buffer. Because we need to defer processing of =over
59 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer
60 # to process this.
61
62 $self->{'pod_para_count'} ||= 0;
63
64 my $line;
65 foreach my $source_line (@_) {
66 if( $self->{'source_dead'} ) {
67 DEBUG > 4 and print "# Source is dead.\n";
68 last;
69 }
70
71 unless( defined $source_line ) {
72 DEBUG > 4 and print "# Undef-line seen.\n";
73
74 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
75 push @$paras, $paras->[-1], $paras->[-1];
76 # So that it definitely fills the buffer.
77 $self->{'source_dead'} = 1;
78 $self->_ponder_paragraph_buffer;
79 next;
80 }
81
82
83 if( $self->{'line_count'}++ ) {
84 ($line = $source_line) =~ tr/\n\r//d;
85 # If we don't have two vars, we'll end up with that there
86 # tr/// modding the (potentially read-only) original source line!
87
88 } else {
89 DEBUG > 2 and print "First line: [$source_line]\n";
90
91 if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
c9989a74
CBW
92 DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
93 $self->_handle_encoding_line( "=encoding utf8" );
351625bd
SP
94 $line =~ tr/\n\r//d;
95
96 } elsif( $line =~ s/^\xFE\xFF//s ) {
97 DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
98 $self->scream(
99 $self->{'line_count'},
100 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
101 );
102 splice @_;
103 push @_, undef;
104 next;
105
106 # TODO: implement somehow?
107
108 } elsif( $line =~ s/^\xFF\xFE//s ) {
109 DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
110 $self->scream(
111 $self->{'line_count'},
112 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
113 );
114 splice @_;
115 push @_, undef;
116 next;
117
118 # TODO: implement somehow?
119
120 } else {
121 DEBUG > 2 and print "First line is BOM-less.\n";
122 ($line = $source_line) =~ tr/\n\r//d;
123 }
124 }
125
0ace302a
SH
126 if(!$self->parse_characters && !$self->{'encoding'}) {
127 $self->_try_encoding_guess($line)
128 }
351625bd
SP
129
130 DEBUG > 5 and print "# Parsing line: [$line]\n";
131
132 if(!$self->{'in_pod'}) {
133 if($line =~ m/^=([a-zA-Z]+)/s) {
134 if($1 eq 'cut') {
135 $self->scream(
136 $self->{'line_count'},
137 "=cut found outside a pod block. Skipping to next block."
138 );
139
140 ## Before there were errata sections in the world, it was
141 ## least-pessimal to abort processing the file. But now we can
142 ## just barrel on thru (but still not start a pod block).
143 #splice @_;
144 #push @_, undef;
145
146 next;
147 } else {
148 $self->{'in_pod'} = $self->{'start_of_pod_block'}
149 = $self->{'last_was_blank'} = 1;
150 # And fall thru to the pod-mode block further down
151 }
152 } else {
153 DEBUG > 5 and print "# It's a code-line.\n";
154 $code_handler->(map $_, $line, $self->{'line_count'}, $self)
155 if $code_handler;
156 # Note: this may cause code to be processed out of order relative
157 # to pods, but in order relative to cuts.
158
159 # Note also that we haven't yet applied the transcoding to $line
160 # by time we call $code_handler!
161
162 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
163 # That RE is from perlsyn, section "Plain Old Comments (Not!)",
164 #$fname = $2 if defined $2;
165 #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
166 DEBUG > 1 and print "# Setting nextline to $1\n";
167 $self->{'line_count'} = $1 - 1;
168 }
169
170 next;
171 }
172 }
173
174 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
175 # Else we're in pod mode:
176
177 # Apply any necessary transcoding:
178 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
179
180 # HERE WE CATCH =encoding EARLY!
181 if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
0ace302a 182 next if $self->parse_characters; # Ignore this line
351625bd
SP
183 $line = $self->_handle_encoding_line( $line );
184 }
185
186 if($line =~ m/^=cut/s) {
187 # here ends the pod block, and therefore the previous pod para
188 DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
189 $self->{'in_pod'} = 0;
190 # ++$self->{'pod_para_count'};
191 $self->_ponder_paragraph_buffer();
192 # by now it's safe to consider the previous paragraph as done.
193 $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
194 if $cut_handler;
195
196 # TODO: add to docs: Note: this may cause cuts to be processed out
197 # of order relative to pods, but in order relative to code.
198
60527824
FR
199 } elsif($line =~ m/^(\s*)$/s) { # it's a blank line
200 if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
201 $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
202 if $wl_handler;
203 }
204
351625bd
SP
205 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
206 DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
207 push @{$paras->[-1]}, $line;
208 } # otherwise it's not interesting
209
210 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
211 DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
212 }
213
214 $self->{'last_was_blank'} = 1;
215
216 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
217
218 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
219 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
220 my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
221 # Note that in "=head1 foo", the WS is lost.
222 # Example: ['=head1', {'start_line' => 123}, ' foo']
223
224 ++$self->{'pod_para_count'};
225
226 $self->_ponder_paragraph_buffer();
227 # by now it's safe to consider the previous paragraph as done.
228
229 push @$paras, $new; # the new incipient paragraph
230 DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
231
232 } elsif($line =~ m/^\s/s) {
233
234 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
235 DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
236 push @{$paras->[-1]}, $line;
237 } else {
238 ++$self->{'pod_para_count'};
239 $self->_ponder_paragraph_buffer();
240 # by now it's safe to consider the previous paragraph as done.
241 DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
242 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
243 }
244 } else {
245 ++$self->{'pod_para_count'};
246 $self->_ponder_paragraph_buffer();
247 # by now it's safe to consider the previous paragraph as done.
248 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
249 DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
250 }
251 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
252
253 } else {
254 # It's a non-blank line /continuing/ the current para
255 if(@$paras) {
256 DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
257 push @{$paras->[-1]}, $line;
258 } else {
259 # Unexpected case!
260 die "Continuing a paragraph but \@\$paras is empty?";
261 }
262 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
263 }
264
265 } # ends the big while loop
266
267 DEBUG > 1 and print(pretty(@$paras), "\n");
268 return $self;
269}
270
271#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
272
273sub _handle_encoding_line {
274 my($self, $line) = @_;
275
0ace302a
SH
276 return if $self->parse_characters;
277
351625bd
SP
278 # The point of this routine is to set $self->{'_transcoder'} as indicated.
279
280 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
281 DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
282
283 my $e = $1;
284 my $orig = $e;
285 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
286
287 my $enc_error;
288
289 # Cf. perldoc Encode and perldoc Encode::Supported
290
291 require Pod::Simple::Transcode;
292
293 if( $self->{'encoding'} ) {
294 my $norm_current = $self->{'encoding'};
295 my $norm_e = $e;
296 foreach my $that ($norm_current, $norm_e) {
297 $that = lc($that);
298 $that =~ s/[-_]//g;
299 }
300 if($norm_current eq $norm_e) {
301 DEBUG > 1 and print "The '=encoding $orig' line is ",
302 "redundant. ($norm_current eq $norm_e). Ignoring.\n";
303 $enc_error = '';
304 # But that doesn't necessarily mean that the earlier one went okay
305 } else {
306 $enc_error = "Encoding is already set to " . $self->{'encoding'};
307 DEBUG > 1 and print $enc_error;
308 }
309 } elsif (
310 # OK, let's turn on the encoding
311 do {
312 DEBUG > 1 and print " Setting encoding to $e\n";
313 $self->{'encoding'} = $e;
314 1;
315 }
316 and $e eq 'HACKRAW'
317 ) {
318 DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
319
320 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
321
322 die($enc_error = "WHAT? _transcoder is already set?!")
323 if $self->{'_transcoder'}; # should never happen
324 require Pod::Simple::Transcode;
325 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
326 eval {
327 my @x = ('', "abc", "123");
328 $self->{'_transcoder'}->(@x);
329 };
330 $@ && die( $enc_error =
331 "Really unexpected error setting up encoding $e: $@\nAborting"
332 );
333
334 } else {
335 my @supported = Pod::Simple::Transcode::->all_encodings;
336
337 # Note unsupported, and complain
338 DEBUG and print " Encoding [$e] is unsupported.",
339 "\nSupporteds: @supported\n";
340 my $suggestion = '';
341
342 # Look for a near match:
343 my $norm = lc($e);
344 $norm =~ tr[-_][]d;
345 my $n;
346 foreach my $enc (@supported) {
347 $n = lc($enc);
348 $n =~ tr[-_][]d;
349 next unless $n eq $norm;
350 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
351 last;
352 }
353 my $encmodver = Pod::Simple::Transcode::->encmodver;
354 $enc_error = join '' =>
355 "This document probably does not appear as it should, because its ",
356 "\"=encoding $e\" line calls for an unsupported encoding.",
357 $suggestion, " [$encmodver\'s supported encodings are: @supported]"
358 ;
359
360 $self->scream( $self->{'line_count'}, $enc_error );
361 }
362 push @{ $self->{'encoding_command_statuses'} }, $enc_error;
363
364 return '=encoding ALREADYDONE';
365}
366
367# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
368
369sub _handle_encoding_second_level {
370 # By time this is called, the encoding (if well formed) will already
371 # have been acted one.
372 my($self, $para) = @_;
373 my @x = @$para;
374 my $content = join ' ', splice @x, 2;
375 $content =~ s/^\s+//s;
376 $content =~ s/\s+$//s;
377
378 DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
379
380 if($content eq 'ALREADYDONE') {
381 # It's already been handled. Check for errors.
382 if(! $self->{'encoding_command_statuses'} ) {
383 DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
384 } elsif( $self->{'encoding_command_statuses'}[-1] ) {
385 $self->whine( $para->[1]{'start_line'},
386 sprintf "Couldn't do %s: %s",
387 $self->{'encoding_command_reqs' }[-1],
388 $self->{'encoding_command_statuses'}[-1],
389 );
390 } else {
391 DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
392 }
393
394 } else {
395 # Otherwise it's a syntax error
396 $self->whine( $para->[1]{'start_line'},
397 "Invalid =encoding syntax: $content"
398 );
399 }
400
401 return;
402}
403
0ace302a
SH
404sub _try_encoding_guess {
405 my ($self,$line) = @_;
406
407 if(!$self->{'in_pod'} and $line !~ /^=/m) {
408 return; # don't whine about non-ASCII bytes in code/comments
409 }
410
411 return unless $line =~ /[^\x00-\x7f]/; # Look for non-ASCII byte
412
413 my $encoding = $line =~ /[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1';
414 $self->_handle_encoding_line( "=encoding $encoding" );
415 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
416
417 my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/;
418
419 $self->whine(
420 $self->{'line_count'},
421 "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
422 );
423
424}
425
351625bd
SP
426#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
427
428{
429my $m = -321; # magic line number
430
431sub _gen_errata {
432 my $self = $_[0];
433 # Return 0 or more fake-o paragraphs explaining the accumulated
434 # errors on this document.
435
436 return() unless $self->{'errata'} and keys %{$self->{'errata'}};
437
438 my @out;
439
440 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
441 push @out,
442 ['=item', {'start_line' => $m}, "Around line $line:"],
443 map( ['~Para', {'start_line' => $m, '~cooked' => 1},
444 #['~Top', {'start_line' => $m},
445 $_
446 #]
447 ],
448 @{$self->{'errata'}{$line}}
449 )
450 ;
451 }
452
453 # TODO: report of unknown entities? unrenderable characters?
454
455 unshift @out,
456 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
457 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
458 "Hey! ",
459 ['B', {},
460 'The above document had some coding errors, which are explained below:'
461 ]
462 ],
463 ['=over', {'start_line' => $m, 'errata' => 1}, ''],
464 ;
465
466 push @out,
467 ['=back', {'start_line' => $m, 'errata' => 1}, ''],
468 ;
469
470 DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
471
472 return @out;
473}
474
475}
476
477#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
478
479##############################################################################
480##
481## stop reading now stop reading now stop reading now stop reading now stop
482##
483## HERE IT BECOMES REALLY SCARY
484##
485## stop reading now stop reading now stop reading now stop reading now stop
486##
487##############################################################################
488
489sub _ponder_paragraph_buffer {
490
491 # Para-token types as found in the buffer.
492 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
493 # =over, =back, =item
494 # and the null =pod (to be complained about if over one line)
495 #
496 # "~data" paragraphs are something we generate at this level, depending on
497 # a currently open =over region
498
499 # Events fired: Begin and end for:
500 # directivename (like head1 .. head4), item, extend,
501 # for (from =begin...=end, =for),
502 # over-bullet, over-number, over-text, over-block,
503 # item-bullet, item-number, item-text,
504 # Document,
505 # Data, Para, Verbatim
506 # B, C, longdirname (TODO -- wha?), etc. for all directives
507 #
508
509 my $self = $_[0];
510 my $paras;
511 return unless @{$paras = $self->{'paras'}};
512 my $curr_open = ($self->{'curr_open'} ||= []);
513
514 my $scratch;
515
516 DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
517
518 # We have something in our buffer. So apparently the document has started.
519 unless($self->{'doc_has_started'}) {
520 $self->{'doc_has_started'} = 1;
521
522 my $starting_contentless;
523 $starting_contentless =
524 (
525 !@$curr_open
526 and @$paras and ! grep $_->[0] ne '~end', @$paras
527 # i.e., if the paras is all ~ends
528 )
529 ;
530 DEBUG and print "# Starting ",
531 $starting_contentless ? 'contentless' : 'contentful',
532 " document\n"
533 ;
534
535 $self->_handle_element_start(
536 ($scratch = 'Document'),
537 {
538 'start_line' => $paras->[0][1]{'start_line'},
539 $starting_contentless ? ( 'contentless' => 1 ) : (),
540 },
541 );
542 }
543
544 my($para, $para_type);
545 while(@$paras) {
546 last if @$paras == 1 and
547 ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
548 or $paras->[0][0] eq '=item' )
549 ;
550 # Those're the three kinds of paragraphs that require lookahead.
551 # Actually, an "=item Foo" inside an <over type=text> region
552 # and any =item inside an <over type=block> region (rare)
553 # don't require any lookahead, but all others (bullets
554 # and numbers) do.
555
c9989a74 556# TODO: whinge about many kinds of directives in non-resolving =for regions?
351625bd
SP
557# TODO: many? like what? =head1 etc?
558
559 $para = shift @$paras;
560 $para_type = $para->[0];
561
562 DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
563 $self->_dump_curr_open(), ")\n";
564
565 if($para_type eq '=for') {
566 next if $self->_ponder_for($para,$curr_open,$paras);
567
568 } elsif($para_type eq '=begin') {
569 next if $self->_ponder_begin($para,$curr_open,$paras);
570
571 } elsif($para_type eq '=end') {
572 next if $self->_ponder_end($para,$curr_open,$paras);
573
574 } elsif($para_type eq '~end') { # The virtual end-document signal
575 next if $self->_ponder_doc_end($para,$curr_open,$paras);
576 }
577
578
579 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
580 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
581 if(grep $_->[1]{'~ignore'}, @$curr_open) {
582 DEBUG > 1 and
583 print "Skipping $para_type paragraph because in ignore mode.\n";
584 next;
585 }
586 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
587 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
588
589 if($para_type eq '=pod') {
590 $self->_ponder_pod($para,$curr_open,$paras);
591
592 } elsif($para_type eq '=over') {
593 next if $self->_ponder_over($para,$curr_open,$paras);
594
595 } elsif($para_type eq '=back') {
596 next if $self->_ponder_back($para,$curr_open,$paras);
597
598 } else {
599
600 # All non-magical codes!!!
601
602 # Here we start using $para_type for our own twisted purposes, to
603 # mean how it should get treated, not as what the element name
604 # should be.
605
606 DEBUG > 1 and print "Pondering non-magical $para_type\n";
607
608 my $i;
609
610 # Enforce some =headN discipline
611 if($para_type =~ m/^=head\d$/s
612 and ! $self->{'accept_heads_anywhere'}
613 and @$curr_open
614 and $curr_open->[-1][0] eq '=over'
615 ) {
616 DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
617 $self->whine(
618 $para->[1]{'start_line'},
619 "You forgot a '=back' before '$para_type'"
620 );
621 unshift @$paras, ['=back', {}, ''], $para; # close the =over
622 next;
623 }
624
625
626 if($para_type eq '=item') {
627
628 my $over;
60527824
FR
629 unless(@$curr_open and
630 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
351625bd
SP
631 $self->whine(
632 $para->[1]{'start_line'},
633 "'=item' outside of any '=over'"
634 );
635 unshift @$paras,
636 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
637 $para
638 ;
639 next;
640 }
641
642
643 my $over_type = $over->[1]{'~type'};
644
645 if(!$over_type) {
646 # Shouldn't happen1
647 die "Typeless over in stack, starting at line "
648 . $over->[1]{'start_line'};
649
650 } elsif($over_type eq 'block') {
651 unless($curr_open->[-1][1]{'~bitched_about'}) {
652 $curr_open->[-1][1]{'~bitched_about'} = 1;
653 $self->whine(
654 $curr_open->[-1][1]{'start_line'},
655 "You can't have =items (as at line "
656 . $para->[1]{'start_line'}
657 . ") unless the first thing after the =over is an =item"
658 );
659 }
660 # Just turn it into a paragraph and reconsider it
661 $para->[0] = '~Para';
662 unshift @$paras, $para;
663 next;
664
665 } elsif($over_type eq 'text') {
666 my $item_type = $self->_get_item_type($para);
667 # That kills the content of the item if it's a number or bullet.
668 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
669
670 if($item_type eq 'text') {
671 # Nothing special needs doing for 'text'
672 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
673 die "Unknown item type $item_type"
674 unless $item_type eq 'number' or $item_type eq 'bullet';
675 # Undo our clobbering:
676 push @$para, $para->[1]{'~orig_content'};
677 delete $para->[1]{'number'};
678 # Only a PROPER item-number element is allowed
679 # to have a number attribute.
680 } else {
681 die "Unhandled item type $item_type"; # should never happen
682 }
683
684 # =item-text thingies don't need any assimilation, it seems.
685
686 } elsif($over_type eq 'number') {
687 my $item_type = $self->_get_item_type($para);
688 # That kills the content of the item if it's a number or bullet.
689 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
690
691 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
692
693 if($item_type eq 'bullet') {
694 # Hm, it's not numeric. Correct for this.
695 $para->[1]{'number'} = $expected_value;
696 $self->whine(
697 $para->[1]{'start_line'},
698 "Expected '=item $expected_value'"
699 );
700 push @$para, $para->[1]{'~orig_content'};
701 # restore the bullet, blocking the assimilation of next para
702
703 } elsif($item_type eq 'text') {
704 # Hm, it's not numeric. Correct for this.
705 $para->[1]{'number'} = $expected_value;
706 $self->whine(
707 $para->[1]{'start_line'},
708 "Expected '=item $expected_value'"
709 );
710 # Text content will still be there and will block next ~Para
711
712 } elsif($item_type ne 'number') {
713 die "Unknown item type $item_type"; # should never happen
714
715 } elsif($expected_value == $para->[1]{'number'}) {
716 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
717
718 } else {
719 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
720 " instead of the expected value of $expected_value\n";
721 $self->whine(
722 $para->[1]{'start_line'},
723 "You have '=item " . $para->[1]{'number'} .
724 "' instead of the expected '=item $expected_value'"
725 );
726 $para->[1]{'number'} = $expected_value; # correcting!!
727 }
728
729 if(@$para == 2) {
730 # For the cases where we /didn't/ push to @$para
731 if($paras->[0][0] eq '~Para') {
732 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
733 push @$para, splice @{shift @$paras},2;
734 } else {
735 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
736 push @$para, ''; # Just so it's not contentless
737 }
738 }
739
740
741 } elsif($over_type eq 'bullet') {
742 my $item_type = $self->_get_item_type($para);
743 # That kills the content of the item if it's a number or bullet.
744 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
745
746 if($item_type eq 'bullet') {
747 # as expected!
748
749 if( $para->[1]{'~_freaky_para_hack'} ) {
750 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
751 push @$para, delete $para->[1]{'~_freaky_para_hack'};
752 }
753
754 } elsif($item_type eq 'number') {
755 $self->whine(
756 $para->[1]{'start_line'},
757 "Expected '=item *'"
758 );
759 push @$para, $para->[1]{'~orig_content'};
760 # and block assimilation of the next paragraph
761 delete $para->[1]{'number'};
762 # Only a PROPER item-number element is allowed
763 # to have a number attribute.
764 } elsif($item_type eq 'text') {
765 $self->whine(
766 $para->[1]{'start_line'},
767 "Expected '=item *'"
768 );
769 # But doesn't need processing. But it'll block assimilation
770 # of the next para.
771 } else {
772 die "Unhandled item type $item_type"; # should never happen
773 }
774
775 if(@$para == 2) {
776 # For the cases where we /didn't/ push to @$para
777 if($paras->[0][0] eq '~Para') {
778 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
779 push @$para, splice @{shift @$paras},2;
780 } else {
781 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
782 push @$para, ''; # Just so it's not contentless
783 }
784 }
785
786 } else {
787 die "Unhandled =over type \"$over_type\"?";
788 # Shouldn't happen!
789 }
790
791 $para_type = 'Plain';
792 $para->[0] .= '-' . $over_type;
793 # Whew. Now fall thru and process it.
794
795
796 } elsif($para_type eq '=extend') {
797 # Well, might as well implement it here.
798 $self->_ponder_extend($para);
799 next; # and skip
800 } elsif($para_type eq '=encoding') {
801 # Not actually acted on here, but we catch errors here.
802 $self->_handle_encoding_second_level($para);
803
804 next; # and skip
805 } elsif($para_type eq '~Verbatim') {
806 $para->[0] = 'Verbatim';
807 $para_type = '?Verbatim';
808 } elsif($para_type eq '~Para') {
809 $para->[0] = 'Para';
810 $para_type = '?Plain';
811 } elsif($para_type eq 'Data') {
812 $para->[0] = 'Data';
813 $para_type = '?Data';
814 } elsif( $para_type =~ s/^=//s
815 and defined( $para_type = $self->{'accept_directives'}{$para_type} )
816 ) {
817 DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
818 } else {
819 # An unknown directive!
820 DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
821 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
822 ;
823 $self->whine(
824 $para->[1]{'start_line'},
825 "Unknown directive: $para->[0]"
826 );
827
828 # And maybe treat it as text instead of just letting it go?
829 next;
830 }
831
832 if($para_type =~ s/^\?//s) {
833 if(! @$curr_open) { # usual case
834 DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
835 } else {
836 my @fors = grep $_->[0] eq '=for', @$curr_open;
837 DEBUG > 1 and print "Containing fors: ",
838 join(',', map $_->[1]{'target'}, @fors), "\n";
839
840 if(! @fors) {
841 DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
842
843 #} elsif(grep $_->[1]{'~resolve'}, @fors) {
844 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
845 } elsif( $fors[-1][1]{'~resolve'} ) {
846 # Look to the immediately containing for
847
848 if($para_type eq 'Data') {
849 DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
850 $para->[0] = 'Para';
851 $para_type = 'Plain';
852 } else {
853 DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
854 }
855 } else {
856 DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
857 $para->[0] = $para_type = 'Data';
858 }
859 }
860 }
861
862 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
863 if($para_type eq 'Plain') {
864 $self->_ponder_Plain($para);
865 } elsif($para_type eq 'Verbatim') {
866 $self->_ponder_Verbatim($para);
867 } elsif($para_type eq 'Data') {
868 $self->_ponder_Data($para);
869 } else {
870 die "\$para type is $para_type -- how did that happen?";
871 # Shouldn't happen.
872 }
873
874 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
875 $para->[0] =~ s/^[~=]//s;
876
877 DEBUG and print "\n", pretty($para), "\n";
878
879 # traverse the treelet (which might well be just one string scalar)
880 $self->{'content_seen'} ||= 1;
881 $self->_traverse_treelet_bit(@$para);
882 }
883 }
884
885 return;
886}
887
888###########################################################################
889# The sub-ponderers...
890
891
892
893sub _ponder_for {
894 my ($self,$para,$curr_open,$paras) = @_;
895
896 # Fake it out as a begin/end
897 my $target;
898
899 if(grep $_->[1]{'~ignore'}, @$curr_open) {
900 DEBUG > 1 and print "Ignoring ignorable =for\n";
901 return 1;
902 }
903
904 for(my $i = 2; $i < @$para; ++$i) {
905 if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
906 $target = $1;
907 last;
908 }
909 }
910 unless(defined $target) {
911 $self->whine(
912 $para->[1]{'start_line'},
913 "=for without a target?"
914 );
915 return 1;
916 }
917 DEBUG > 1 and
918 print "Faking out a =for $target as a =begin $target / =end $target\n";
919
920 $para->[0] = 'Data';
921
922 unshift @$paras,
923 ['=begin',
924 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
925 $target,
926 ],
927 $para,
928 ['=end',
929 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
930 $target,
931 ],
932 ;
933
934 return 1;
935}
936
937sub _ponder_begin {
938 my ($self,$para,$curr_open,$paras) = @_;
939 my $content = join ' ', splice @$para, 2;
940 $content =~ s/^\s+//s;
941 $content =~ s/\s+$//s;
942 unless(length($content)) {
943 $self->whine(
944 $para->[1]{'start_line'},
945 "=begin without a target?"
946 );
947 DEBUG and print "Ignoring targetless =begin\n";
948 return 1;
949 }
950
69473a20
SP
951 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
952 $para->[1]{'title'} = $title if ($title);
953 $para->[1]{'target'} = $target; # without any ':'
954 $content = $target; # strip off the title
c2111e44 955
351625bd
SP
956 $content =~ s/^:!/!:/s;
957 my $neg; # whether this is a negation-match
958 $neg = 1 if $content =~ s/^!//s;
959 my $to_resolve; # whether to process formatting codes
960 $to_resolve = 1 if $content =~ s/^://s;
961
962 my $dont_ignore; # whether this target matches us
963
964 foreach my $target_name (
965 split(',', $content, -1),
966 $neg ? () : '*'
967 ) {
968 DEBUG > 2 and
969 print " Considering whether =begin $content matches $target_name\n";
970 next unless $self->{'accept_targets'}{$target_name};
971
972 DEBUG > 2 and
973 print " It DOES match the acceptable target $target_name!\n";
974 $to_resolve = 1
975 if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
976 $dont_ignore = 1;
977 $para->[1]{'target_matching'} = $target_name;
978 last; # stop looking at other target names
979 }
980
981 if($neg) {
982 if( $dont_ignore ) {
983 $dont_ignore = '';
984 delete $para->[1]{'target_matching'};
985 DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
986 } else {
987 $dont_ignore = 1;
988 $para->[1]{'target_matching'} = '!';
989 DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
990 }
991 }
992
993 $para->[0] = '=for'; # Just what we happen to call these, internally
994 $para->[1]{'~really'} ||= '=begin';
995 $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
996 $para->[1]{'~resolve'} = $to_resolve || 0;
997
998 DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
999 "ignore contents of this region\n";
1000 DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
1001 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1002 DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
1003
1004 push @$curr_open, $para;
1005 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1006 DEBUG > 1 and print "Ignoring ignorable =begin\n";
1007 } else {
1008 $self->{'content_seen'} ||= 1;
1009 $self->_handle_element_start((my $scratch='for'), $para->[1]);
1010 }
1011
1012 return 1;
1013}
1014
1015sub _ponder_end {
1016 my ($self,$para,$curr_open,$paras) = @_;
1017 my $content = join ' ', splice @$para, 2;
1018 $content =~ s/^\s+//s;
1019 $content =~ s/\s+$//s;
1020 DEBUG and print "Ogling '=end $content' directive\n";
60527824 1021
351625bd
SP
1022 unless(length($content)) {
1023 $self->whine(
1024 $para->[1]{'start_line'},
1025 "'=end' without a target?" . (
1026 ( @$curr_open and $curr_open->[-1][0] eq '=for' )
1027 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1028 : ''
1029 )
1030 );
1031 DEBUG and print "Ignoring targetless =end\n";
1032 return 1;
1033 }
1034
1035 unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1036 $self->whine(
1037 $para->[1]{'start_line'},
1038 "'=end $content' is invalid. (Stack: "
1039 . $self->_dump_curr_open() . ')'
1040 );
1041 DEBUG and print "Ignoring mistargetted =end $content\n";
1042 return 1;
1043 }
1044
1045 unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1046 $self->whine(
1047 $para->[1]{'start_line'},
1048 "=end $content without matching =begin. (Stack: "
1049 . $self->_dump_curr_open() . ')'
1050 );
1051 DEBUG and print "Ignoring mistargetted =end $content\n";
1052 return 1;
1053 }
1054
1055 unless($content eq $curr_open->[-1][1]{'target'}) {
1056 $self->whine(
1057 $para->[1]{'start_line'},
1058 "=end $content doesn't match =begin "
1059 . $curr_open->[-1][1]{'target'}
1060 . ". (Stack: "
1061 . $self->_dump_curr_open() . ')'
1062 );
1063 DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1064 return 1;
1065 }
1066
1067 # Else it's okay to close...
1068 if(grep $_->[1]{'~ignore'}, @$curr_open) {
1069 DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1070 # And that may be because of this to-be-closed =for region, or some
1071 # other one, but it doesn't matter.
1072 } else {
1073 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1074 # what's that for?
1075
1076 $self->{'content_seen'} ||= 1;
60527824 1077 $self->_handle_element_end( my $scratch = 'for', $para->[1]);
351625bd
SP
1078 }
1079 DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1080 pop @$curr_open;
1081
1082 return 1;
1083}
1084
1085sub _ponder_doc_end {
1086 my ($self,$para,$curr_open,$paras) = @_;
1087 if(@$curr_open) { # Deal with things left open
1088 DEBUG and print "Stack is nonempty at end-document: (",
1089 $self->_dump_curr_open(), ")\n";
1090
1091 DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1092 unshift @$paras, $self->_closers_for_all_curr_open;
1093 # Make sure there is exactly one ~end in the parastack, at the end:
1094 @$paras = grep $_->[0] ne '~end', @$paras;
1095 push @$paras, $para, $para;
1096 # We need two -- once for the next cycle where we
1097 # generate errata, and then another to be at the end
1098 # when that loop back around to process the errata.
1099 return 1;
1100
1101 } else {
1102 DEBUG and print "Okay, stack is empty now.\n";
1103 }
1104
1105 # Try generating errata section, if applicable
1106 unless($self->{'~tried_gen_errata'}) {
1107 $self->{'~tried_gen_errata'} = 1;
1108 my @extras = $self->_gen_errata();
1109 if(@extras) {
1110 unshift @$paras, @extras;
1111 DEBUG and print "Generated errata... relooping...\n";
1112 return 1; # I.e., loop around again to process these fake-o paragraphs
1113 }
1114 }
1115
1116 splice @$paras; # Well, that's that for this paragraph buffer.
1117 DEBUG and print "Throwing end-document event.\n";
1118
1119 $self->_handle_element_end( my $scratch = 'Document' );
1120 return 1; # Hasta la byebye
1121}
1122
1123sub _ponder_pod {
1124 my ($self,$para,$curr_open,$paras) = @_;
1125 $self->whine(
1126 $para->[1]{'start_line'},
1127 "=pod directives shouldn't be over one line long! Ignoring all "
1128 . (@$para - 2) . " lines of content"
1129 ) if @$para > 3;
60527824
FR
1130
1131 # Content ignored unless 'pod_handler' is set
1132 if (my $pod_handler = $self->{'pod_handler'}) {
1133 my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1134 $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1135 $pod_handler->($line, $line_num, $self);
1136 }
1137
1138 # The surrounding methods set content_seen, so let us remain consistent.
1139 # I do not know why it was not here before -- should it not be here?
1140 # $self->{'content_seen'} ||= 1;
1141
351625bd
SP
1142 return;
1143}
1144
1145sub _ponder_over {
1146 my ($self,$para,$curr_open,$paras) = @_;
1147 return 1 unless @$paras;
1148 my $list_type;
1149
1150 if($paras->[0][0] eq '=item') { # most common case
1151 $list_type = $self->_get_initial_item_type($paras->[0]);
1152
1153 } elsif($paras->[0][0] eq '=back') {
60527824
FR
1154 # Ignore empty lists by default
1155 if ($self->{'parse_empty_lists'}) {
1156 $list_type = 'empty';
1157 } else {
1158 shift @$paras;
1159 return 1;
1160 }
351625bd
SP
1161 } elsif($paras->[0][0] eq '~end') {
1162 $self->whine(
1163 $para->[1]{'start_line'},
1164 "=over is the last thing in the document?!"
1165 );
1166 return 1; # But feh, ignore it.
1167 } else {
1168 $list_type = 'block';
1169 }
1170 $para->[1]{'~type'} = $list_type;
1171 push @$curr_open, $para;
1172 # yes, we reuse the paragraph as a stack item
1173
1174 my $content = join ' ', splice @$para, 2;
1175 my $overness;
1176 if($content =~ m/^\s*$/s) {
1177 $para->[1]{'indent'} = 4;
1178 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
1179 no integer;
1180 $para->[1]{'indent'} = $1;
1181 if($1 == 0) {
1182 $self->whine(
1183 $para->[1]{'start_line'},
1184 "Can't have a 0 in =over $content"
1185 );
1186 $para->[1]{'indent'} = 4;
1187 }
1188 } else {
1189 $self->whine(
1190 $para->[1]{'start_line'},
1191 "=over should be: '=over' or '=over positive_number'"
1192 );
1193 $para->[1]{'indent'} = 4;
1194 }
1195 DEBUG > 1 and print "=over found of type $list_type\n";
1196
1197 $self->{'content_seen'} ||= 1;
1198 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1199
1200 return;
1201}
1202
1203sub _ponder_back {
1204 my ($self,$para,$curr_open,$paras) = @_;
1205 # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1206
1207 my $content = join ' ', splice @$para, 2;
1208 if($content =~ m/\S/) {
1209 $self->whine(
1210 $para->[1]{'start_line'},
1211 "=back doesn't take any parameters, but you said =back $content"
1212 );
1213 }
1214
1215 if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1216 DEBUG > 1 and print "=back happily closes matching =over\n";
1217 # Expected case: we're closing the most recently opened thing
1218 #my $over = pop @$curr_open;
1219 $self->{'content_seen'} ||= 1;
1220 $self->_handle_element_end( my $scratch =
60527824 1221 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
351625bd
SP
1222 );
1223 } else {
1224 DEBUG > 1 and print "=back found without a matching =over. Stack: (",
1225 join(', ', map $_->[0], @$curr_open), ").\n";
1226 $self->whine(
1227 $para->[1]{'start_line'},
1228 '=back without =over'
1229 );
1230 return 1; # and ignore it
1231 }
1232}
1233
1234sub _ponder_item {
1235 my ($self,$para,$curr_open,$paras) = @_;
1236 my $over;
60527824
FR
1237 unless(@$curr_open and
1238 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
351625bd
SP
1239 $self->whine(
1240 $para->[1]{'start_line'},
1241 "'=item' outside of any '=over'"
1242 );
1243 unshift @$paras,
1244 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1245 $para
1246 ;
1247 return 1;
1248 }
1249
1250
1251 my $over_type = $over->[1]{'~type'};
1252
1253 if(!$over_type) {
1254 # Shouldn't happen1
1255 die "Typeless over in stack, starting at line "
1256 . $over->[1]{'start_line'};
1257
1258 } elsif($over_type eq 'block') {
1259 unless($curr_open->[-1][1]{'~bitched_about'}) {
1260 $curr_open->[-1][1]{'~bitched_about'} = 1;
1261 $self->whine(
1262 $curr_open->[-1][1]{'start_line'},
1263 "You can't have =items (as at line "
1264 . $para->[1]{'start_line'}
1265 . ") unless the first thing after the =over is an =item"
1266 );
1267 }
1268 # Just turn it into a paragraph and reconsider it
1269 $para->[0] = '~Para';
1270 unshift @$paras, $para;
1271 return 1;
1272
1273 } elsif($over_type eq 'text') {
1274 my $item_type = $self->_get_item_type($para);
1275 # That kills the content of the item if it's a number or bullet.
1276 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1277
1278 if($item_type eq 'text') {
1279 # Nothing special needs doing for 'text'
1280 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1281 die "Unknown item type $item_type"
1282 unless $item_type eq 'number' or $item_type eq 'bullet';
1283 # Undo our clobbering:
1284 push @$para, $para->[1]{'~orig_content'};
1285 delete $para->[1]{'number'};
1286 # Only a PROPER item-number element is allowed
1287 # to have a number attribute.
1288 } else {
1289 die "Unhandled item type $item_type"; # should never happen
1290 }
1291
1292 # =item-text thingies don't need any assimilation, it seems.
1293
1294 } elsif($over_type eq 'number') {
1295 my $item_type = $self->_get_item_type($para);
1296 # That kills the content of the item if it's a number or bullet.
1297 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1298
1299 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1300
1301 if($item_type eq 'bullet') {
1302 # Hm, it's not numeric. Correct for this.
1303 $para->[1]{'number'} = $expected_value;
1304 $self->whine(
1305 $para->[1]{'start_line'},
1306 "Expected '=item $expected_value'"
1307 );
1308 push @$para, $para->[1]{'~orig_content'};
1309 # restore the bullet, blocking the assimilation of next para
1310
1311 } elsif($item_type eq 'text') {
1312 # Hm, it's not numeric. Correct for this.
1313 $para->[1]{'number'} = $expected_value;
1314 $self->whine(
1315 $para->[1]{'start_line'},
1316 "Expected '=item $expected_value'"
1317 );
1318 # Text content will still be there and will block next ~Para
1319
1320 } elsif($item_type ne 'number') {
1321 die "Unknown item type $item_type"; # should never happen
1322
1323 } elsif($expected_value == $para->[1]{'number'}) {
1324 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1325
1326 } else {
1327 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1328 " instead of the expected value of $expected_value\n";
1329 $self->whine(
1330 $para->[1]{'start_line'},
1331 "You have '=item " . $para->[1]{'number'} .
1332 "' instead of the expected '=item $expected_value'"
1333 );
1334 $para->[1]{'number'} = $expected_value; # correcting!!
1335 }
1336
1337 if(@$para == 2) {
1338 # For the cases where we /didn't/ push to @$para
1339 if($paras->[0][0] eq '~Para') {
1340 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1341 push @$para, splice @{shift @$paras},2;
1342 } else {
1343 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1344 push @$para, ''; # Just so it's not contentless
1345 }
1346 }
1347
1348
1349 } elsif($over_type eq 'bullet') {
1350 my $item_type = $self->_get_item_type($para);
1351 # That kills the content of the item if it's a number or bullet.
1352 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1353
1354 if($item_type eq 'bullet') {
1355 # as expected!
1356
1357 if( $para->[1]{'~_freaky_para_hack'} ) {
1358 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1359 push @$para, delete $para->[1]{'~_freaky_para_hack'};
1360 }
1361
1362 } elsif($item_type eq 'number') {
1363 $self->whine(
1364 $para->[1]{'start_line'},
1365 "Expected '=item *'"
1366 );
1367 push @$para, $para->[1]{'~orig_content'};
1368 # and block assimilation of the next paragraph
1369 delete $para->[1]{'number'};
1370 # Only a PROPER item-number element is allowed
1371 # to have a number attribute.
1372 } elsif($item_type eq 'text') {
1373 $self->whine(
1374 $para->[1]{'start_line'},
1375 "Expected '=item *'"
1376 );
1377 # But doesn't need processing. But it'll block assimilation
1378 # of the next para.
1379 } else {
1380 die "Unhandled item type $item_type"; # should never happen
1381 }
1382
1383 if(@$para == 2) {
1384 # For the cases where we /didn't/ push to @$para
1385 if($paras->[0][0] eq '~Para') {
1386 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1387 push @$para, splice @{shift @$paras},2;
1388 } else {
1389 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1390 push @$para, ''; # Just so it's not contentless
1391 }
1392 }
1393
1394 } else {
1395 die "Unhandled =over type \"$over_type\"?";
1396 # Shouldn't happen!
1397 }
1398 $para->[0] .= '-' . $over_type;
1399
1400 return;
1401}
1402
1403sub _ponder_Plain {
1404 my ($self,$para) = @_;
1405 DEBUG and print " giving plain treatment...\n";
1406 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1407 or $para->[1]{'~cooked'}
1408 ) {
1409 push @$para,
1410 @{$self->_make_treelet(
1411 join("\n", splice(@$para, 2)),
1412 $para->[1]{'start_line'}
1413 )};
1414 }
1415 # Empty paragraphs don't need a treelet for any reason I can see.
1416 # And precooked paragraphs already have a treelet.
1417 return;
1418}
1419
1420sub _ponder_Verbatim {
1421 my ($self,$para) = @_;
1422 DEBUG and print " giving verbatim treatment...\n";
1423
1424 $para->[1]{'xml:space'} = 'preserve';
9d65762f
DW
1425
1426 my $indent = $self->strip_verbatim_indent;
1427 if ($indent && ref $indent eq 'CODE') {
1428 my @shifted = (shift @{$para}, shift @{$para});
1429 $indent = $indent->($para);
1430 unshift @{$para}, @shifted;
1431 }
1432
351625bd
SP
1433 for(my $i = 2; $i < @$para; $i++) {
1434 foreach my $line ($para->[$i]) { # just for aliasing
9d65762f 1435 # Strip indentation.
5aeca1f7 1436 $line =~ s/^\Q$indent// if $indent
9d65762f 1437 && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
351625bd
SP
1438 while( $line =~
1439 # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1440 # tabs are at every EIGHTH column. For portability, it has to be
1441 # one setting everywhere, and 8th wins.
1442 s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1443 ) {}
1444
1445 # TODO: whinge about (or otherwise treat) unindented or overlong lines
1446
1447 }
1448 }
1449
1450 # Now the VerbatimFormatted hoodoo...
1451 if( $self->{'accept_codes'} and
1452 $self->{'accept_codes'}{'VerbatimFormatted'}
1453 ) {
1454 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1455 # Kill any number of terminal newlines
1456 $self->_verbatim_format($para);
1457 } elsif ($self->{'codes_in_verbatim'}) {
1458 push @$para,
1459 @{$self->_make_treelet(
1460 join("\n", splice(@$para, 2)),
1461 $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1462 )};
1463 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1464 } else {
1465 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1466 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1467 }
1468 return;
1469}
1470
1471sub _ponder_Data {
1472 my ($self,$para) = @_;
1473 DEBUG and print " giving data treatment...\n";
1474 $para->[1]{'xml:space'} = 'preserve';
1475 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1476 return;
1477}
1478
1479
1480
1481
1482###########################################################################
1483
1484sub _traverse_treelet_bit { # for use only by the routine above
1485 my($self, $name) = splice @_,0,2;
1486
1487 my $scratch;
1488 $self->_handle_element_start(($scratch=$name), shift @_);
1489
0ace302a
SH
1490 while (@_) {
1491 my $x = shift;
1492 if (ref($x)) {
351625bd
SP
1493 &_traverse_treelet_bit($self, @$x);
1494 } else {
0ace302a 1495 $x .= shift while @_ && !ref($_[0]);
351625bd
SP
1496 $self->_handle_text($x);
1497 }
1498 }
1499
1500 $self->_handle_element_end($scratch=$name);
1501 return;
1502}
1503
1504#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1505
1506sub _closers_for_all_curr_open {
1507 my $self = $_[0];
1508 my @closers;
1509 foreach my $still_open (@{ $self->{'curr_open'} || return }) {
1510 my @copy = @$still_open;
1511 $copy[1] = {%{ $copy[1] }};
1512 #$copy[1]{'start_line'} = -1;
1513 if($copy[0] eq '=for') {
1514 $copy[0] = '=end';
1515 } elsif($copy[0] eq '=over') {
1516 $copy[0] = '=back';
1517 } else {
1518 die "I don't know how to auto-close an open $copy[0] region";
1519 }
1520
1521 unless( @copy > 2 ) {
1522 push @copy, $copy[1]{'target'};
1523 $copy[-1] = '' unless defined $copy[-1];
1524 # since =over's don't have targets
1525 }
60527824
FR
1526
1527 $copy[1]{'fake-closer'} = 1;
1528
351625bd
SP
1529 DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1530 unshift @closers, \@copy;
1531 }
1532 return @closers;
1533}
1534
1535#--------------------------------------------------------------------------
1536
1537sub _verbatim_format {
1538 my($it, $p) = @_;
1539
1540 my $formatting;
1541
1542 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1543 DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1544 $p->[$i] .= "\n";
1545 # Unlike with simple Verbatim blocks, we don't end up just doing
1546 # a join("\n", ...) on the contents, so we have to append a
1547 # newline to ever line, and then nix the last one later.
1548 }
1549
1550 if( DEBUG > 4 ) {
1551 print "<<\n";
1552 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1553 print "_verbatim_format $i: $p->[$i]";
1554 }
1555 print ">>\n";
1556 }
1557
1558 for(my $i = $#$p; $i > 2; $i--) {
1559 # work backwards over the lines, except the first (#2)
1560
1561 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1562 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1563 # look at a formatty line preceding a nonformatty one
1564 DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1565 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1566 DEBUG > 5 and print " It's a formatty line. ",
1567 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1568
1569 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1570 DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n";
1571 next;
1572 } else {
1573 DEBUG > 5 and print " Previous line is non-formatty! Yay!\n";
1574 }
1575 } else {
1576 DEBUG > 5 and print " It's not a formatty line. Ignoring\n";
1577 next;
1578 }
1579
1580 # A formatty line has to have #: in the first two columns, and uses
1581 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1582 # Example:
1583 # What do you want? i like pie. [or whatever]
1584 # #:^^^^^^^^^^^^^^^^^ /////////////
1585
1586
1587 DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1588
1589 $formatting = ' ' . $1;
1590 $formatting =~ s/\s+$//s; # nix trailing whitespace
1591 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1592 splice @$p,$i,1; # remove this line
1593 $i--; # don't consider next line
1594 next;
1595 }
1596
1597 if( length($formatting) >= length($p->[$i-1]) ) {
1598 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1599 } else {
1600 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1601 }
1602 # Make $formatting and the previous line be exactly the same length,
1603 # with $formatting having a " " as the last character.
1604
1605 DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1606
1607
1608 my @new_line;
1609 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1610 #print "Format matches $1\n";
1611
1612 if($2) {
1613 #print "SKIPPING <$2>\n";
1614 push @new_line,
1615 substr($p->[$i-1], pos($formatting)-length($1), length($1));
1616 } else {
1617 #print "SNARING $+\n";
1618 push @new_line, [
1619 (
1620 $3 ? 'VerbatimB' :
1621 $4 ? 'VerbatimI' :
1622 $5 ? 'VerbatimBI' : die("Should never get called")
1623 ), {},
1624 substr($p->[$i-1], pos($formatting)-length($1), length($1))
1625 ];
1626 #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1627 }
1628 }
1629 my @nixed =
1630 splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1631 DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1632
1633 DEBUG > 6 and print "New version of the above line is these tokens (",
1634 scalar(@new_line), "):",
1635 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1636 $i--; # So the next line we scrutinize is the line before the one
1637 # that we just went and formatted
1638 }
1639
1640 $p->[0] = 'VerbatimFormatted';
1641
1642 # Collapse adjacent text nodes, just for kicks.
1643 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1644 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1645 DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1646 $p->[$i] .= splice @$p, $i+1, 1; # merge
1647 --$i; # and back up
1648 }
1649 }
1650
1651 # Now look for the last text token, and remove the terminal newline
1652 for( my $i = $#$p; $i >= 2; $i-- ) {
1653 # work backwards over the tokens, even the first
1654 if( !ref($p->[$i]) ) {
1655 if($p->[$i] =~ s/\n$//s) {
1656 DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1657 } else {
1658 DEBUG > 5 and print
1659 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1660 }
1661 last; # we only want the next one
1662 }
1663 }
1664
1665 return;
1666}
1667
1668
1669#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1670
1671
1672sub _treelet_from_formatting_codes {
1673 # Given a paragraph, returns a treelet. Full of scary tokenizing code.
1674 # Like [ '~Top', {'start_line' => $start_line},
1675 # "I like ",
1676 # [ 'B', {}, "pie" ],
1677 # "!"
1678 # ]
1679
1680 my($self, $para, $start_line, $preserve_space) = @_;
1681
1682 my $treelet = ['~Top', {'start_line' => $start_line},];
1683
1684 unless ($preserve_space || $self->{'preserve_whitespace'}) {
351625bd
SP
1685 $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1686 $para =~ s/ $//;
1687 $para =~ s/^ //;
1688 }
1689
1690 # Only apparent problem the above code is that N<< >> turns into
1691 # N<< >>. But then, word wrapping does that too! So don't do that!
1692
1693 my @stack;
1694 my @lineage = ($treelet);
60527824
FR
1695 my $raw = ''; # raw content of L<> fcode before splitting/processing
1696 # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
1697 # into just 1 ' '. Is this the regex's doing or 'raw's?
1698 my $inL = 0;
351625bd
SP
1699
1700 DEBUG > 4 and print "Paragraph:\n$para\n\n";
1701
1702 # Here begins our frightening tokenizer RE. The following regex matches
1703 # text in four main parts:
1704 #
1705 # * Start-codes. The first alternative matches C< or C<<, the latter
1706 # followed by some whitespace. $1 will hold the entire start code
1707 # (including any space following a multiple-angle-bracket delimiter),
1708 # and $2 will hold only the additional brackets past the first in a
1709 # multiple-bracket delimiter. length($2) + 1 will be the number of
1710 # closing brackets we have to find.
1711 #
1712 # * Closing brackets. Match some amount of whitespace followed by
1713 # multiple close brackets. The logic to see if this closes anything
1714 # is down below. Note that in order to parse C<< >> correctly, we
1715 # have to use look-behind (?<=\s\s), since the match of the starting
1716 # code will have consumed the whitespace.
1717 #
1718 # * A single closing bracket, to close a simple code like C<>.
1719 #
1720 # * Something that isn't a start or end code. We have to be careful
1721 # about accepting whitespace, since perlpodspec says that any whitespace
1722 # before a multiple-bracket closing delimiter should be ignored.
1723 #
1724 while($para =~
1725 m/\G
1726 (?:
1727 # Match starting codes, including the whitespace following a
1728 # multiple-delimiter start code. $1 gets the whole start code and
1729 # $2 gets all but one of the <s in the multiple-bracket case.
1730 ([A-Z]<(?:(<+)\s+)?)
1731 |
1732 # Match multiple-bracket end codes. $3 gets the whitespace that
1733 # should be discarded before an end bracket but kept in other cases
1734 # and $4 gets the end brackets themselves.
1735 (\s+|(?<=\s\s))(>{2,})
1736 |
1737 (\s?>) # $5: simple end-codes
1738 |
1739 ( # $6: stuff containing no start-codes or end-codes
1740 (?:
6669d9b5 1741 [^A-Z\s>]
351625bd
SP
1742 |
1743 (?:
1744 [A-Z](?!<)
1745 )
1746 |
69473a20
SP
1747 # whitespace is ok, but we don't want to eat the whitespace before
1748 # a multiple-bracket end code.
1749 # NOTE: we may still have problems with e.g. S<< >>
351625bd 1750 (?:
69473a20 1751 \s(?!\s*>{2,})
351625bd
SP
1752 )
1753 )+
1754 )
1755 )
1756 /xgo
1757 ) {
1758 DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1759 if(defined $1) {
1760 if(defined $2) {
1761 DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
316e9929
RS
1762 push @stack, length($2) + 1;
1763 # length of the necessary complex end-code string
351625bd
SP
1764 } else {
1765 DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
316e9929 1766 push @stack, 0; # signal that we're looking for simple
351625bd 1767 }
316e9929
RS
1768 push @lineage, [ substr($1,0,1), {}, ]; # new node object
1769 push @{ $lineage[-2] }, $lineage[-1];
60527824
FR
1770 if ('L' eq substr($1,0,1)) {
1771 $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
1772 $inL = 1;
1773 } else {
1774 $raw .= $1 if $inL;
1775 }
1776
351625bd
SP
1777 } elsif(defined $4) {
1778 DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1779 # This is where it gets messy...
1780 if(! @stack) {
1781 # We saw " >>>>" but needed nothing. This is ALL just stuff then.
1782 DEBUG > 4 and print " But it's really just stuff.\n";
1783 push @{ $lineage[-1] }, $3, $4;
1784 next;
1785 } elsif(!$stack[-1]) {
1786 # We saw " >>>>" but needed only ">". Back pos up.
1787 DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1788 push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1789 pos($para) = pos($para) - length($4) + 1;
1790 } elsif($stack[-1] == length($4)) {
1791 # We found " >>>>", and it was exactly what we needed. Commonest case.
1792 DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1793 } elsif($stack[-1] < length($4)) {
1794 # We saw " >>>>" but needed only " >>". Back pos up.
1795 DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1796 pos($para) = pos($para) - length($4) + $stack[-1];
1797 } else {
1798 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1799 DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1800 push @{ $lineage[-1] }, $3, $4;
1801 next;
1802 }
1803 #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1804
1805 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1806 # Keep the element from being childless
1807
1808 pop @stack;
1809 pop @lineage;
60527824
FR
1810
1811 unless (@stack) { # not in an L if there are no open fcodes
1812 $inL = 0;
1813 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1814 $lineage[-1][-1][1]{'raw'} = $raw
1815 }
1816 }
1817 $raw .= $3.$4 if $inL;
351625bd
SP
1818
1819 } elsif(defined $5) {
9d65762f 1820 DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";
351625bd
SP
1821
1822 if(@stack and ! $stack[-1]) {
1823 # We're indeed expecting a simple end-code
1824 DEBUG > 4 and print " It's indeed an end-code.\n";
1825
1826 if(length($5) == 2) { # There was a space there: " >"
1827 push @{ $lineage[-1] }, ' ';
1828 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1829 push @{ $lineage[-1] }, ''; # keep it from being really childless
1830 }
1831
1832 pop @stack;
1833 pop @lineage;
1834 } else {
1835 DEBUG > 4 and print " It's just stuff.\n";
1836 push @{ $lineage[-1] }, $5;
1837 }
1838
60527824
FR
1839 unless (@stack) { # not in an L if there are no open fcodes
1840 $inL = 0;
1841 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1842 $lineage[-1][-1][1]{'raw'} = $raw
1843 }
1844 }
1845 $raw .= $5 if $inL;
1846
351625bd
SP
1847 } elsif(defined $6) {
1848 DEBUG > 3 and print "Found stuff \"$6\"\n";
1849 push @{ $lineage[-1] }, $6;
60527824
FR
1850 $raw .= $6 if $inL;
1851 # XXX does not capture multiplace whitespaces -- 'raw' ends up with
1852 # at most 1 leading/trailing whitespace, why not all of it?
1853
351625bd
SP
1854 } else {
1855 # should never ever ever ever happen
1856 DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1857 die "SPORK 512512!";
1858 }
1859 }
1860
1861 if(@stack) { # Uhoh, some sequences weren't closed.
1862 my $x= "...";
1863 while(@stack) {
1864 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1865 # Hmmmmm!
1866
1867 my $code = (pop @lineage)->[0];
1868 my $ender_length = pop @stack;
1869 if($ender_length) {
1870 --$ender_length;
1871 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1872 } else {
1873 $x = $code . "<$x>";
1874 }
1875 }
1876 DEBUG > 1 and print "Unterminated $x sequence\n";
1877 $self->whine($start_line,
1878 "Unterminated $x sequence",
1879 );
1880 }
60527824 1881
351625bd
SP
1882 return $treelet;
1883}
1884
1885#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1886
1887sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1888 return stringify_lol($_[1]);
1889}
1890
1891sub stringify_lol { # function: stringify_lol($lol)
1892 my $string_form = '';
1893 _stringify_lol( $_[0] => \$string_form );
1894 return $string_form;
1895}
1896
1897sub _stringify_lol { # the real recursor
1898 my($lol, $to) = @_;
351625bd
SP
1899 for(my $i = 2; $i < @$lol; ++$i) {
1900 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1901 _stringify_lol( $lol->[$i], $to); # recurse!
1902 } else {
1903 $$to .= $lol->[$i];
1904 }
1905 }
1906 return;
1907}
1908
1909#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1910
1911sub _dump_curr_open { # return a string representation of the stack
1912 my $curr_open = $_[0]{'curr_open'};
1913
1914 return '[empty]' unless @$curr_open;
1915 return join '; ',
1916 map {;
1917 ($_->[0] eq '=for')
1918 ? ( ($_->[1]{'~really'} || '=over')
1919 . ' ' . $_->[1]{'target'})
1920 : $_->[0]
1921 }
1922 @$curr_open
1923 ;
1924}
1925
1926###########################################################################
1927my %pretty_form = (
1928 "\a" => '\a', # ding!
1929 "\b" => '\b', # BS
1930 "\e" => '\e', # ESC
1931 "\f" => '\f', # FF
1932 "\t" => '\t', # tab
1933 "\cm" => '\cm',
1934 "\cj" => '\cj',
1935 "\n" => '\n', # probably overrides one of either \cm or \cj
1936 '"' => '\"',
1937 '\\' => '\\\\',
1938 '$' => '\\$',
1939 '@' => '\\@',
1940 '%' => '\\%',
1941 '#' => '\\#',
1942);
1943
1944sub pretty { # adopted from Class::Classless
1945 # Not the most brilliant routine, but passable.
1946 # Don't give it a cyclic data structure!
1947 my @stuff = @_; # copy
1948 my $x;
1949 my $out =
1950 # join ",\n" .
1951 join ", ",
1952 map {;
1953 if(!defined($_)) {
1954 "undef";
1955 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1956 $x = "[ " . pretty(@$_) . " ]" ;
1957 $x;
1958 } elsif(ref($_) eq 'SCALAR') {
1959 $x = "\\" . pretty($$_) ;
1960 $x;
1961 } elsif(ref($_) eq 'HASH') {
1962 my $hr = $_;
1963 $x = "{" . join(", ",
1964 map(pretty($_) . '=>' . pretty($hr->{$_}),
1965 sort keys %$hr ) ) . "}" ;
1966 $x;
1967 } elsif(!length($_)) { q{''} # empty string
1968 } elsif(
1969 $_ eq '0' # very common case
1970 or(
1971 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
1972 and $_ ne '-0' # the strange case that that RE lets thru
1973 )
1974 ) { $_;
1975 } else {
1976 if( chr(65) eq 'A' ) {
1977 s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1978 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1979 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1980 } else {
1981 # We're in some crazy non-ASCII world!
1982 s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1983 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1984 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1985 }
1986 qq{"$_"};
1987 }
1988 } @stuff;
1989 # $out =~ s/\n */ /g if length($out) < 75;
1990 return $out;
1991}
1992
1993#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9ea6f39e
SP
1994
1995# A rather unsubtle method of blowing away all the state information
1996# from a parser object so it can be reused. Provided as a utility for
c9989a74 1997# backward compatibility in Pod::Man, etc. but not recommended for
9ea6f39e
SP
1998# general use.
1999
2000sub reinit {
2001 my $self = shift;
2002 foreach (qw(source_dead source_filename doc_has_started
2003start_of_pod_block content_seen last_was_blank paras curr_open
2004line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
2005Title)) {
2006
2007 delete $self->{$_};
2008 }
2009}
2010
2011#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
351625bd
SP
20121;
2013