Commit | Line | Data |
---|---|---|
351625bd SP |
1 | |
2 | package Pod::Simple::BlackBox; | |
3 | # | |
4 | # "What's in the box?" "Pain." | |
5 | # | |
6 | ########################################################################### | |
7 | # | |
8 | # This is where all the scary things happen: parsing lines into | |
9 | # paragraphs; and then into directives, verbatims, and then also | |
10 | # turning formatting sequences into treelets. | |
11 | # | |
12 | # Are you really sure you want to read this code? | |
13 | # | |
14 | #----------------------------------------------------------------------------- | |
15 | # | |
16 | # The basic work of this module Pod::Simple::BlackBox is doing the dirty work | |
17 | # of parsing Pod into treelets (generally one per non-verbatim paragraph), and | |
18 | # to call the proper callbacks on the treelets. | |
19 | # | |
20 | # Every node in a treelet is a ['name', {attrhash}, ...children...] | |
21 | ||
22 | use integer; # vroom! | |
23 | use strict; | |
24 | use Carp (); | |
a242eeb4 | 25 | use vars qw($VERSION ); |
0ace302a | 26 | $VERSION = '3.23'; |
9d65762f | 27 | #use constant DEBUG => 7; |
351625bd SP |
28 | BEGIN { |
29 | require Pod::Simple; | |
30 | *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG | |
31 | } | |
32 | ||
33 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
34 | ||
35 | sub parse_line { shift->parse_lines(@_) } # alias | |
36 | ||
37 | # - - - Turn back now! Run away! - - - | |
38 | ||
39 | sub parse_lines { # Usage: $parser->parse_lines(@lines) | |
40 | # an undef means end-of-stream | |
41 | my $self = shift; | |
42 | ||
43 | my $code_handler = $self->{'code_handler'}; | |
44 | my $cut_handler = $self->{'cut_handler'}; | |
60527824 | 45 | my $wl_handler = $self->{'whiteline_handler'}; |
351625bd SP |
46 | $self->{'line_count'} ||= 0; |
47 | ||
48 | my $scratch; | |
49 | ||
50 | DEBUG > 4 and | |
51 | print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; | |
52 | ||
53 | DEBUG > 5 and | |
54 | print "# About to parse lines: ", | |
55 | join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; | |
56 | ||
57 | my $paras = ($self->{'paras'} ||= []); | |
58 | # paragraph buffer. Because we need to defer processing of =over | |
59 | # directives and verbatim paragraphs. We call _ponder_paragraph_buffer | |
60 | # to process this. | |
61 | ||
62 | $self->{'pod_para_count'} ||= 0; | |
63 | ||
64 | my $line; | |
65 | foreach my $source_line (@_) { | |
66 | if( $self->{'source_dead'} ) { | |
67 | DEBUG > 4 and print "# Source is dead.\n"; | |
68 | last; | |
69 | } | |
70 | ||
71 | unless( defined $source_line ) { | |
72 | DEBUG > 4 and print "# Undef-line seen.\n"; | |
73 | ||
74 | push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; | |
75 | push @$paras, $paras->[-1], $paras->[-1]; | |
76 | # So that it definitely fills the buffer. | |
77 | $self->{'source_dead'} = 1; | |
78 | $self->_ponder_paragraph_buffer; | |
79 | next; | |
80 | } | |
81 | ||
82 | ||
83 | if( $self->{'line_count'}++ ) { | |
84 | ($line = $source_line) =~ tr/\n\r//d; | |
85 | # If we don't have two vars, we'll end up with that there | |
86 | # tr/// modding the (potentially read-only) original source line! | |
87 | ||
88 | } else { | |
89 | DEBUG > 2 and print "First line: [$source_line]\n"; | |
90 | ||
91 | if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { | |
c9989a74 CBW |
92 | DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; |
93 | $self->_handle_encoding_line( "=encoding utf8" ); | |
351625bd SP |
94 | $line =~ tr/\n\r//d; |
95 | ||
96 | } elsif( $line =~ s/^\xFE\xFF//s ) { | |
97 | DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; | |
98 | $self->scream( | |
99 | $self->{'line_count'}, | |
100 | "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | |
101 | ); | |
102 | splice @_; | |
103 | push @_, undef; | |
104 | next; | |
105 | ||
106 | # TODO: implement somehow? | |
107 | ||
108 | } elsif( $line =~ s/^\xFF\xFE//s ) { | |
109 | DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; | |
110 | $self->scream( | |
111 | $self->{'line_count'}, | |
112 | "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | |
113 | ); | |
114 | splice @_; | |
115 | push @_, undef; | |
116 | next; | |
117 | ||
118 | # TODO: implement somehow? | |
119 | ||
120 | } else { | |
121 | DEBUG > 2 and print "First line is BOM-less.\n"; | |
122 | ($line = $source_line) =~ tr/\n\r//d; | |
123 | } | |
124 | } | |
125 | ||
0ace302a SH |
126 | if(!$self->parse_characters && !$self->{'encoding'}) { |
127 | $self->_try_encoding_guess($line) | |
128 | } | |
351625bd SP |
129 | |
130 | DEBUG > 5 and print "# Parsing line: [$line]\n"; | |
131 | ||
132 | if(!$self->{'in_pod'}) { | |
133 | if($line =~ m/^=([a-zA-Z]+)/s) { | |
134 | if($1 eq 'cut') { | |
135 | $self->scream( | |
136 | $self->{'line_count'}, | |
137 | "=cut found outside a pod block. Skipping to next block." | |
138 | ); | |
139 | ||
140 | ## Before there were errata sections in the world, it was | |
141 | ## least-pessimal to abort processing the file. But now we can | |
142 | ## just barrel on thru (but still not start a pod block). | |
143 | #splice @_; | |
144 | #push @_, undef; | |
145 | ||
146 | next; | |
147 | } else { | |
148 | $self->{'in_pod'} = $self->{'start_of_pod_block'} | |
149 | = $self->{'last_was_blank'} = 1; | |
150 | # And fall thru to the pod-mode block further down | |
151 | } | |
152 | } else { | |
153 | DEBUG > 5 and print "# It's a code-line.\n"; | |
154 | $code_handler->(map $_, $line, $self->{'line_count'}, $self) | |
155 | if $code_handler; | |
156 | # Note: this may cause code to be processed out of order relative | |
157 | # to pods, but in order relative to cuts. | |
158 | ||
159 | # Note also that we haven't yet applied the transcoding to $line | |
160 | # by time we call $code_handler! | |
161 | ||
162 | if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { | |
163 | # That RE is from perlsyn, section "Plain Old Comments (Not!)", | |
164 | #$fname = $2 if defined $2; | |
165 | #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; | |
166 | DEBUG > 1 and print "# Setting nextline to $1\n"; | |
167 | $self->{'line_count'} = $1 - 1; | |
168 | } | |
169 | ||
170 | next; | |
171 | } | |
172 | } | |
173 | ||
174 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | |
175 | # Else we're in pod mode: | |
176 | ||
177 | # Apply any necessary transcoding: | |
178 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | |
179 | ||
180 | # HERE WE CATCH =encoding EARLY! | |
181 | if( $line =~ m/^=encoding\s+\S+\s*$/s ) { | |
0ace302a | 182 | next if $self->parse_characters; # Ignore this line |
351625bd SP |
183 | $line = $self->_handle_encoding_line( $line ); |
184 | } | |
185 | ||
186 | if($line =~ m/^=cut/s) { | |
187 | # here ends the pod block, and therefore the previous pod para | |
188 | DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; | |
189 | $self->{'in_pod'} = 0; | |
190 | # ++$self->{'pod_para_count'}; | |
191 | $self->_ponder_paragraph_buffer(); | |
192 | # by now it's safe to consider the previous paragraph as done. | |
193 | $cut_handler->(map $_, $line, $self->{'line_count'}, $self) | |
194 | if $cut_handler; | |
195 | ||
196 | # TODO: add to docs: Note: this may cause cuts to be processed out | |
197 | # of order relative to pods, but in order relative to code. | |
198 | ||
60527824 FR |
199 | } elsif($line =~ m/^(\s*)$/s) { # it's a blank line |
200 | if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line | |
201 | $wl_handler->(map $_, $line, $self->{'line_count'}, $self) | |
202 | if $wl_handler; | |
203 | } | |
204 | ||
351625bd SP |
205 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { |
206 | DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; | |
207 | push @{$paras->[-1]}, $line; | |
208 | } # otherwise it's not interesting | |
209 | ||
210 | if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { | |
211 | DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; | |
212 | } | |
213 | ||
214 | $self->{'last_was_blank'} = 1; | |
215 | ||
216 | } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... | |
217 | ||
218 | if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { | |
219 | # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS | |
220 | my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; | |
221 | # Note that in "=head1 foo", the WS is lost. | |
222 | # Example: ['=head1', {'start_line' => 123}, ' foo'] | |
223 | ||
224 | ++$self->{'pod_para_count'}; | |
225 | ||
226 | $self->_ponder_paragraph_buffer(); | |
227 | # by now it's safe to consider the previous paragraph as done. | |
228 | ||
229 | push @$paras, $new; # the new incipient paragraph | |
230 | DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; | |
231 | ||
232 | } elsif($line =~ m/^\s/s) { | |
233 | ||
234 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | |
235 | DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; | |
236 | push @{$paras->[-1]}, $line; | |
237 | } else { | |
238 | ++$self->{'pod_para_count'}; | |
239 | $self->_ponder_paragraph_buffer(); | |
240 | # by now it's safe to consider the previous paragraph as done. | |
241 | DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; | |
242 | push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; | |
243 | } | |
244 | } else { | |
245 | ++$self->{'pod_para_count'}; | |
246 | $self->_ponder_paragraph_buffer(); | |
247 | # by now it's safe to consider the previous paragraph as done. | |
248 | push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; | |
249 | DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; | |
250 | } | |
251 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | |
252 | ||
253 | } else { | |
254 | # It's a non-blank line /continuing/ the current para | |
255 | if(@$paras) { | |
256 | DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; | |
257 | push @{$paras->[-1]}, $line; | |
258 | } else { | |
259 | # Unexpected case! | |
260 | die "Continuing a paragraph but \@\$paras is empty?"; | |
261 | } | |
262 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | |
263 | } | |
264 | ||
265 | } # ends the big while loop | |
266 | ||
267 | DEBUG > 1 and print(pretty(@$paras), "\n"); | |
268 | return $self; | |
269 | } | |
270 | ||
271 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
272 | ||
273 | sub _handle_encoding_line { | |
274 | my($self, $line) = @_; | |
275 | ||
0ace302a SH |
276 | return if $self->parse_characters; |
277 | ||
351625bd SP |
278 | # The point of this routine is to set $self->{'_transcoder'} as indicated. |
279 | ||
280 | return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; | |
281 | DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; | |
282 | ||
283 | my $e = $1; | |
284 | my $orig = $e; | |
285 | push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; | |
286 | ||
287 | my $enc_error; | |
288 | ||
289 | # Cf. perldoc Encode and perldoc Encode::Supported | |
290 | ||
291 | require Pod::Simple::Transcode; | |
292 | ||
293 | if( $self->{'encoding'} ) { | |
294 | my $norm_current = $self->{'encoding'}; | |
295 | my $norm_e = $e; | |
296 | foreach my $that ($norm_current, $norm_e) { | |
297 | $that = lc($that); | |
298 | $that =~ s/[-_]//g; | |
299 | } | |
300 | if($norm_current eq $norm_e) { | |
301 | DEBUG > 1 and print "The '=encoding $orig' line is ", | |
302 | "redundant. ($norm_current eq $norm_e). Ignoring.\n"; | |
303 | $enc_error = ''; | |
304 | # But that doesn't necessarily mean that the earlier one went okay | |
305 | } else { | |
306 | $enc_error = "Encoding is already set to " . $self->{'encoding'}; | |
307 | DEBUG > 1 and print $enc_error; | |
308 | } | |
309 | } elsif ( | |
310 | # OK, let's turn on the encoding | |
311 | do { | |
312 | DEBUG > 1 and print " Setting encoding to $e\n"; | |
313 | $self->{'encoding'} = $e; | |
314 | 1; | |
315 | } | |
316 | and $e eq 'HACKRAW' | |
317 | ) { | |
318 | DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; | |
319 | ||
320 | } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { | |
321 | ||
322 | die($enc_error = "WHAT? _transcoder is already set?!") | |
323 | if $self->{'_transcoder'}; # should never happen | |
324 | require Pod::Simple::Transcode; | |
325 | $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); | |
326 | eval { | |
327 | my @x = ('', "abc", "123"); | |
328 | $self->{'_transcoder'}->(@x); | |
329 | }; | |
330 | $@ && die( $enc_error = | |
331 | "Really unexpected error setting up encoding $e: $@\nAborting" | |
332 | ); | |
333 | ||
334 | } else { | |
335 | my @supported = Pod::Simple::Transcode::->all_encodings; | |
336 | ||
337 | # Note unsupported, and complain | |
338 | DEBUG and print " Encoding [$e] is unsupported.", | |
339 | "\nSupporteds: @supported\n"; | |
340 | my $suggestion = ''; | |
341 | ||
342 | # Look for a near match: | |
343 | my $norm = lc($e); | |
344 | $norm =~ tr[-_][]d; | |
345 | my $n; | |
346 | foreach my $enc (@supported) { | |
347 | $n = lc($enc); | |
348 | $n =~ tr[-_][]d; | |
349 | next unless $n eq $norm; | |
350 | $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; | |
351 | last; | |
352 | } | |
353 | my $encmodver = Pod::Simple::Transcode::->encmodver; | |
354 | $enc_error = join '' => | |
355 | "This document probably does not appear as it should, because its ", | |
356 | "\"=encoding $e\" line calls for an unsupported encoding.", | |
357 | $suggestion, " [$encmodver\'s supported encodings are: @supported]" | |
358 | ; | |
359 | ||
360 | $self->scream( $self->{'line_count'}, $enc_error ); | |
361 | } | |
362 | push @{ $self->{'encoding_command_statuses'} }, $enc_error; | |
363 | ||
364 | return '=encoding ALREADYDONE'; | |
365 | } | |
366 | ||
367 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
368 | ||
369 | sub _handle_encoding_second_level { | |
370 | # By time this is called, the encoding (if well formed) will already | |
371 | # have been acted one. | |
372 | my($self, $para) = @_; | |
373 | my @x = @$para; | |
374 | my $content = join ' ', splice @x, 2; | |
375 | $content =~ s/^\s+//s; | |
376 | $content =~ s/\s+$//s; | |
377 | ||
378 | DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; | |
379 | ||
380 | if($content eq 'ALREADYDONE') { | |
381 | # It's already been handled. Check for errors. | |
382 | if(! $self->{'encoding_command_statuses'} ) { | |
383 | DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; | |
384 | } elsif( $self->{'encoding_command_statuses'}[-1] ) { | |
385 | $self->whine( $para->[1]{'start_line'}, | |
386 | sprintf "Couldn't do %s: %s", | |
387 | $self->{'encoding_command_reqs' }[-1], | |
388 | $self->{'encoding_command_statuses'}[-1], | |
389 | ); | |
390 | } else { | |
391 | DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; | |
392 | } | |
393 | ||
394 | } else { | |
395 | # Otherwise it's a syntax error | |
396 | $self->whine( $para->[1]{'start_line'}, | |
397 | "Invalid =encoding syntax: $content" | |
398 | ); | |
399 | } | |
400 | ||
401 | return; | |
402 | } | |
403 | ||
0ace302a SH |
404 | sub _try_encoding_guess { |
405 | my ($self,$line) = @_; | |
406 | ||
407 | if(!$self->{'in_pod'} and $line !~ /^=/m) { | |
408 | return; # don't whine about non-ASCII bytes in code/comments | |
409 | } | |
410 | ||
411 | return unless $line =~ /[^\x00-\x7f]/; # Look for non-ASCII byte | |
412 | ||
413 | my $encoding = $line =~ /[\xC0-\xFD][\x80-\xBF]/ ? 'UTF-8' : 'ISO8859-1'; | |
414 | $self->_handle_encoding_line( "=encoding $encoding" ); | |
415 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | |
416 | ||
417 | my ($word) = $line =~ /(\S*[^\x00-\x7f]\S*)/; | |
418 | ||
419 | $self->whine( | |
420 | $self->{'line_count'}, | |
421 | "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding" | |
422 | ); | |
423 | ||
424 | } | |
425 | ||
351625bd SP |
426 | #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` |
427 | ||
428 | { | |
429 | my $m = -321; # magic line number | |
430 | ||
431 | sub _gen_errata { | |
432 | my $self = $_[0]; | |
433 | # Return 0 or more fake-o paragraphs explaining the accumulated | |
434 | # errors on this document. | |
435 | ||
436 | return() unless $self->{'errata'} and keys %{$self->{'errata'}}; | |
437 | ||
438 | my @out; | |
439 | ||
440 | foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { | |
441 | push @out, | |
442 | ['=item', {'start_line' => $m}, "Around line $line:"], | |
443 | map( ['~Para', {'start_line' => $m, '~cooked' => 1}, | |
444 | #['~Top', {'start_line' => $m}, | |
445 | $_ | |
446 | #] | |
447 | ], | |
448 | @{$self->{'errata'}{$line}} | |
449 | ) | |
450 | ; | |
451 | } | |
452 | ||
453 | # TODO: report of unknown entities? unrenderable characters? | |
454 | ||
455 | unshift @out, | |
456 | ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], | |
457 | ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, | |
458 | "Hey! ", | |
459 | ['B', {}, | |
460 | 'The above document had some coding errors, which are explained below:' | |
461 | ] | |
462 | ], | |
463 | ['=over', {'start_line' => $m, 'errata' => 1}, ''], | |
464 | ; | |
465 | ||
466 | push @out, | |
467 | ['=back', {'start_line' => $m, 'errata' => 1}, ''], | |
468 | ; | |
469 | ||
470 | DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; | |
471 | ||
472 | return @out; | |
473 | } | |
474 | ||
475 | } | |
476 | ||
477 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
478 | ||
479 | ############################################################################## | |
480 | ## | |
481 | ## stop reading now stop reading now stop reading now stop reading now stop | |
482 | ## | |
483 | ## HERE IT BECOMES REALLY SCARY | |
484 | ## | |
485 | ## stop reading now stop reading now stop reading now stop reading now stop | |
486 | ## | |
487 | ############################################################################## | |
488 | ||
489 | sub _ponder_paragraph_buffer { | |
490 | ||
491 | # Para-token types as found in the buffer. | |
492 | # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, | |
493 | # =over, =back, =item | |
494 | # and the null =pod (to be complained about if over one line) | |
495 | # | |
496 | # "~data" paragraphs are something we generate at this level, depending on | |
497 | # a currently open =over region | |
498 | ||
499 | # Events fired: Begin and end for: | |
500 | # directivename (like head1 .. head4), item, extend, | |
501 | # for (from =begin...=end, =for), | |
502 | # over-bullet, over-number, over-text, over-block, | |
503 | # item-bullet, item-number, item-text, | |
504 | # Document, | |
505 | # Data, Para, Verbatim | |
506 | # B, C, longdirname (TODO -- wha?), etc. for all directives | |
507 | # | |
508 | ||
509 | my $self = $_[0]; | |
510 | my $paras; | |
511 | return unless @{$paras = $self->{'paras'}}; | |
512 | my $curr_open = ($self->{'curr_open'} ||= []); | |
513 | ||
514 | my $scratch; | |
515 | ||
516 | DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; | |
517 | ||
518 | # We have something in our buffer. So apparently the document has started. | |
519 | unless($self->{'doc_has_started'}) { | |
520 | $self->{'doc_has_started'} = 1; | |
521 | ||
522 | my $starting_contentless; | |
523 | $starting_contentless = | |
524 | ( | |
525 | !@$curr_open | |
526 | and @$paras and ! grep $_->[0] ne '~end', @$paras | |
527 | # i.e., if the paras is all ~ends | |
528 | ) | |
529 | ; | |
530 | DEBUG and print "# Starting ", | |
531 | $starting_contentless ? 'contentless' : 'contentful', | |
532 | " document\n" | |
533 | ; | |
534 | ||
535 | $self->_handle_element_start( | |
536 | ($scratch = 'Document'), | |
537 | { | |
538 | 'start_line' => $paras->[0][1]{'start_line'}, | |
539 | $starting_contentless ? ( 'contentless' => 1 ) : (), | |
540 | }, | |
541 | ); | |
542 | } | |
543 | ||
544 | my($para, $para_type); | |
545 | while(@$paras) { | |
546 | last if @$paras == 1 and | |
547 | ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' | |
548 | or $paras->[0][0] eq '=item' ) | |
549 | ; | |
550 | # Those're the three kinds of paragraphs that require lookahead. | |
551 | # Actually, an "=item Foo" inside an <over type=text> region | |
552 | # and any =item inside an <over type=block> region (rare) | |
553 | # don't require any lookahead, but all others (bullets | |
554 | # and numbers) do. | |
555 | ||
c9989a74 | 556 | # TODO: whinge about many kinds of directives in non-resolving =for regions? |
351625bd SP |
557 | # TODO: many? like what? =head1 etc? |
558 | ||
559 | $para = shift @$paras; | |
560 | $para_type = $para->[0]; | |
561 | ||
562 | DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", | |
563 | $self->_dump_curr_open(), ")\n"; | |
564 | ||
565 | if($para_type eq '=for') { | |
566 | next if $self->_ponder_for($para,$curr_open,$paras); | |
567 | ||
568 | } elsif($para_type eq '=begin') { | |
569 | next if $self->_ponder_begin($para,$curr_open,$paras); | |
570 | ||
571 | } elsif($para_type eq '=end') { | |
572 | next if $self->_ponder_end($para,$curr_open,$paras); | |
573 | ||
574 | } elsif($para_type eq '~end') { # The virtual end-document signal | |
575 | next if $self->_ponder_doc_end($para,$curr_open,$paras); | |
576 | } | |
577 | ||
578 | ||
579 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
580 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
581 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | |
582 | DEBUG > 1 and | |
583 | print "Skipping $para_type paragraph because in ignore mode.\n"; | |
584 | next; | |
585 | } | |
586 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
587 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | |
588 | ||
589 | if($para_type eq '=pod') { | |
590 | $self->_ponder_pod($para,$curr_open,$paras); | |
591 | ||
592 | } elsif($para_type eq '=over') { | |
593 | next if $self->_ponder_over($para,$curr_open,$paras); | |
594 | ||
595 | } elsif($para_type eq '=back') { | |
596 | next if $self->_ponder_back($para,$curr_open,$paras); | |
597 | ||
598 | } else { | |
599 | ||
600 | # All non-magical codes!!! | |
601 | ||
602 | # Here we start using $para_type for our own twisted purposes, to | |
603 | # mean how it should get treated, not as what the element name | |
604 | # should be. | |
605 | ||
606 | DEBUG > 1 and print "Pondering non-magical $para_type\n"; | |
607 | ||
608 | my $i; | |
609 | ||
610 | # Enforce some =headN discipline | |
611 | if($para_type =~ m/^=head\d$/s | |
612 | and ! $self->{'accept_heads_anywhere'} | |
613 | and @$curr_open | |
614 | and $curr_open->[-1][0] eq '=over' | |
615 | ) { | |
616 | DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; | |
617 | $self->whine( | |
618 | $para->[1]{'start_line'}, | |
619 | "You forgot a '=back' before '$para_type'" | |
620 | ); | |
621 | unshift @$paras, ['=back', {}, ''], $para; # close the =over | |
622 | next; | |
623 | } | |
624 | ||
625 | ||
626 | if($para_type eq '=item') { | |
627 | ||
628 | my $over; | |
60527824 FR |
629 | unless(@$curr_open and |
630 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | |
351625bd SP |
631 | $self->whine( |
632 | $para->[1]{'start_line'}, | |
633 | "'=item' outside of any '=over'" | |
634 | ); | |
635 | unshift @$paras, | |
636 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | |
637 | $para | |
638 | ; | |
639 | next; | |
640 | } | |
641 | ||
642 | ||
643 | my $over_type = $over->[1]{'~type'}; | |
644 | ||
645 | if(!$over_type) { | |
646 | # Shouldn't happen1 | |
647 | die "Typeless over in stack, starting at line " | |
648 | . $over->[1]{'start_line'}; | |
649 | ||
650 | } elsif($over_type eq 'block') { | |
651 | unless($curr_open->[-1][1]{'~bitched_about'}) { | |
652 | $curr_open->[-1][1]{'~bitched_about'} = 1; | |
653 | $self->whine( | |
654 | $curr_open->[-1][1]{'start_line'}, | |
655 | "You can't have =items (as at line " | |
656 | . $para->[1]{'start_line'} | |
657 | . ") unless the first thing after the =over is an =item" | |
658 | ); | |
659 | } | |
660 | # Just turn it into a paragraph and reconsider it | |
661 | $para->[0] = '~Para'; | |
662 | unshift @$paras, $para; | |
663 | next; | |
664 | ||
665 | } elsif($over_type eq 'text') { | |
666 | my $item_type = $self->_get_item_type($para); | |
667 | # That kills the content of the item if it's a number or bullet. | |
668 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | |
669 | ||
670 | if($item_type eq 'text') { | |
671 | # Nothing special needs doing for 'text' | |
672 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | |
673 | die "Unknown item type $item_type" | |
674 | unless $item_type eq 'number' or $item_type eq 'bullet'; | |
675 | # Undo our clobbering: | |
676 | push @$para, $para->[1]{'~orig_content'}; | |
677 | delete $para->[1]{'number'}; | |
678 | # Only a PROPER item-number element is allowed | |
679 | # to have a number attribute. | |
680 | } else { | |
681 | die "Unhandled item type $item_type"; # should never happen | |
682 | } | |
683 | ||
684 | # =item-text thingies don't need any assimilation, it seems. | |
685 | ||
686 | } elsif($over_type eq 'number') { | |
687 | my $item_type = $self->_get_item_type($para); | |
688 | # That kills the content of the item if it's a number or bullet. | |
689 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | |
690 | ||
691 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | |
692 | ||
693 | if($item_type eq 'bullet') { | |
694 | # Hm, it's not numeric. Correct for this. | |
695 | $para->[1]{'number'} = $expected_value; | |
696 | $self->whine( | |
697 | $para->[1]{'start_line'}, | |
698 | "Expected '=item $expected_value'" | |
699 | ); | |
700 | push @$para, $para->[1]{'~orig_content'}; | |
701 | # restore the bullet, blocking the assimilation of next para | |
702 | ||
703 | } elsif($item_type eq 'text') { | |
704 | # Hm, it's not numeric. Correct for this. | |
705 | $para->[1]{'number'} = $expected_value; | |
706 | $self->whine( | |
707 | $para->[1]{'start_line'}, | |
708 | "Expected '=item $expected_value'" | |
709 | ); | |
710 | # Text content will still be there and will block next ~Para | |
711 | ||
712 | } elsif($item_type ne 'number') { | |
713 | die "Unknown item type $item_type"; # should never happen | |
714 | ||
715 | } elsif($expected_value == $para->[1]{'number'}) { | |
716 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | |
717 | ||
718 | } else { | |
719 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | |
720 | " instead of the expected value of $expected_value\n"; | |
721 | $self->whine( | |
722 | $para->[1]{'start_line'}, | |
723 | "You have '=item " . $para->[1]{'number'} . | |
724 | "' instead of the expected '=item $expected_value'" | |
725 | ); | |
726 | $para->[1]{'number'} = $expected_value; # correcting!! | |
727 | } | |
728 | ||
729 | if(@$para == 2) { | |
730 | # For the cases where we /didn't/ push to @$para | |
731 | if($paras->[0][0] eq '~Para') { | |
732 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | |
733 | push @$para, splice @{shift @$paras},2; | |
734 | } else { | |
735 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | |
736 | push @$para, ''; # Just so it's not contentless | |
737 | } | |
738 | } | |
739 | ||
740 | ||
741 | } elsif($over_type eq 'bullet') { | |
742 | my $item_type = $self->_get_item_type($para); | |
743 | # That kills the content of the item if it's a number or bullet. | |
744 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | |
745 | ||
746 | if($item_type eq 'bullet') { | |
747 | # as expected! | |
748 | ||
749 | if( $para->[1]{'~_freaky_para_hack'} ) { | |
750 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | |
751 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | |
752 | } | |
753 | ||
754 | } elsif($item_type eq 'number') { | |
755 | $self->whine( | |
756 | $para->[1]{'start_line'}, | |
757 | "Expected '=item *'" | |
758 | ); | |
759 | push @$para, $para->[1]{'~orig_content'}; | |
760 | # and block assimilation of the next paragraph | |
761 | delete $para->[1]{'number'}; | |
762 | # Only a PROPER item-number element is allowed | |
763 | # to have a number attribute. | |
764 | } elsif($item_type eq 'text') { | |
765 | $self->whine( | |
766 | $para->[1]{'start_line'}, | |
767 | "Expected '=item *'" | |
768 | ); | |
769 | # But doesn't need processing. But it'll block assimilation | |
770 | # of the next para. | |
771 | } else { | |
772 | die "Unhandled item type $item_type"; # should never happen | |
773 | } | |
774 | ||
775 | if(@$para == 2) { | |
776 | # For the cases where we /didn't/ push to @$para | |
777 | if($paras->[0][0] eq '~Para') { | |
778 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | |
779 | push @$para, splice @{shift @$paras},2; | |
780 | } else { | |
781 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | |
782 | push @$para, ''; # Just so it's not contentless | |
783 | } | |
784 | } | |
785 | ||
786 | } else { | |
787 | die "Unhandled =over type \"$over_type\"?"; | |
788 | # Shouldn't happen! | |
789 | } | |
790 | ||
791 | $para_type = 'Plain'; | |
792 | $para->[0] .= '-' . $over_type; | |
793 | # Whew. Now fall thru and process it. | |
794 | ||
795 | ||
796 | } elsif($para_type eq '=extend') { | |
797 | # Well, might as well implement it here. | |
798 | $self->_ponder_extend($para); | |
799 | next; # and skip | |
800 | } elsif($para_type eq '=encoding') { | |
801 | # Not actually acted on here, but we catch errors here. | |
802 | $self->_handle_encoding_second_level($para); | |
803 | ||
804 | next; # and skip | |
805 | } elsif($para_type eq '~Verbatim') { | |
806 | $para->[0] = 'Verbatim'; | |
807 | $para_type = '?Verbatim'; | |
808 | } elsif($para_type eq '~Para') { | |
809 | $para->[0] = 'Para'; | |
810 | $para_type = '?Plain'; | |
811 | } elsif($para_type eq 'Data') { | |
812 | $para->[0] = 'Data'; | |
813 | $para_type = '?Data'; | |
814 | } elsif( $para_type =~ s/^=//s | |
815 | and defined( $para_type = $self->{'accept_directives'}{$para_type} ) | |
816 | ) { | |
817 | DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; | |
818 | } else { | |
819 | # An unknown directive! | |
820 | DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", | |
821 | $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) | |
822 | ; | |
823 | $self->whine( | |
824 | $para->[1]{'start_line'}, | |
825 | "Unknown directive: $para->[0]" | |
826 | ); | |
827 | ||
828 | # And maybe treat it as text instead of just letting it go? | |
829 | next; | |
830 | } | |
831 | ||
832 | if($para_type =~ s/^\?//s) { | |
833 | if(! @$curr_open) { # usual case | |
834 | DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; | |
835 | } else { | |
836 | my @fors = grep $_->[0] eq '=for', @$curr_open; | |
837 | DEBUG > 1 and print "Containing fors: ", | |
838 | join(',', map $_->[1]{'target'}, @fors), "\n"; | |
839 | ||
840 | if(! @fors) { | |
841 | DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; | |
842 | ||
843 | #} elsif(grep $_->[1]{'~resolve'}, @fors) { | |
844 | #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { | |
845 | } elsif( $fors[-1][1]{'~resolve'} ) { | |
846 | # Look to the immediately containing for | |
847 | ||
848 | if($para_type eq 'Data') { | |
849 | DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | |
850 | $para->[0] = 'Para'; | |
851 | $para_type = 'Plain'; | |
852 | } else { | |
853 | DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | |
854 | } | |
855 | } else { | |
856 | DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; | |
857 | $para->[0] = $para_type = 'Data'; | |
858 | } | |
859 | } | |
860 | } | |
861 | ||
862 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
863 | if($para_type eq 'Plain') { | |
864 | $self->_ponder_Plain($para); | |
865 | } elsif($para_type eq 'Verbatim') { | |
866 | $self->_ponder_Verbatim($para); | |
867 | } elsif($para_type eq 'Data') { | |
868 | $self->_ponder_Data($para); | |
869 | } else { | |
870 | die "\$para type is $para_type -- how did that happen?"; | |
871 | # Shouldn't happen. | |
872 | } | |
873 | ||
874 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
875 | $para->[0] =~ s/^[~=]//s; | |
876 | ||
877 | DEBUG and print "\n", pretty($para), "\n"; | |
878 | ||
879 | # traverse the treelet (which might well be just one string scalar) | |
880 | $self->{'content_seen'} ||= 1; | |
881 | $self->_traverse_treelet_bit(@$para); | |
882 | } | |
883 | } | |
884 | ||
885 | return; | |
886 | } | |
887 | ||
888 | ########################################################################### | |
889 | # The sub-ponderers... | |
890 | ||
891 | ||
892 | ||
893 | sub _ponder_for { | |
894 | my ($self,$para,$curr_open,$paras) = @_; | |
895 | ||
896 | # Fake it out as a begin/end | |
897 | my $target; | |
898 | ||
899 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | |
900 | DEBUG > 1 and print "Ignoring ignorable =for\n"; | |
901 | return 1; | |
902 | } | |
903 | ||
904 | for(my $i = 2; $i < @$para; ++$i) { | |
905 | if($para->[$i] =~ s/^\s*(\S+)\s*//s) { | |
906 | $target = $1; | |
907 | last; | |
908 | } | |
909 | } | |
910 | unless(defined $target) { | |
911 | $self->whine( | |
912 | $para->[1]{'start_line'}, | |
913 | "=for without a target?" | |
914 | ); | |
915 | return 1; | |
916 | } | |
917 | DEBUG > 1 and | |
918 | print "Faking out a =for $target as a =begin $target / =end $target\n"; | |
919 | ||
920 | $para->[0] = 'Data'; | |
921 | ||
922 | unshift @$paras, | |
923 | ['=begin', | |
924 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | |
925 | $target, | |
926 | ], | |
927 | $para, | |
928 | ['=end', | |
929 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | |
930 | $target, | |
931 | ], | |
932 | ; | |
933 | ||
934 | return 1; | |
935 | } | |
936 | ||
937 | sub _ponder_begin { | |
938 | my ($self,$para,$curr_open,$paras) = @_; | |
939 | my $content = join ' ', splice @$para, 2; | |
940 | $content =~ s/^\s+//s; | |
941 | $content =~ s/\s+$//s; | |
942 | unless(length($content)) { | |
943 | $self->whine( | |
944 | $para->[1]{'start_line'}, | |
945 | "=begin without a target?" | |
946 | ); | |
947 | DEBUG and print "Ignoring targetless =begin\n"; | |
948 | return 1; | |
949 | } | |
950 | ||
69473a20 SP |
951 | my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; |
952 | $para->[1]{'title'} = $title if ($title); | |
953 | $para->[1]{'target'} = $target; # without any ':' | |
954 | $content = $target; # strip off the title | |
c2111e44 | 955 | |
351625bd SP |
956 | $content =~ s/^:!/!:/s; |
957 | my $neg; # whether this is a negation-match | |
958 | $neg = 1 if $content =~ s/^!//s; | |
959 | my $to_resolve; # whether to process formatting codes | |
960 | $to_resolve = 1 if $content =~ s/^://s; | |
961 | ||
962 | my $dont_ignore; # whether this target matches us | |
963 | ||
964 | foreach my $target_name ( | |
965 | split(',', $content, -1), | |
966 | $neg ? () : '*' | |
967 | ) { | |
968 | DEBUG > 2 and | |
969 | print " Considering whether =begin $content matches $target_name\n"; | |
970 | next unless $self->{'accept_targets'}{$target_name}; | |
971 | ||
972 | DEBUG > 2 and | |
973 | print " It DOES match the acceptable target $target_name!\n"; | |
974 | $to_resolve = 1 | |
975 | if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; | |
976 | $dont_ignore = 1; | |
977 | $para->[1]{'target_matching'} = $target_name; | |
978 | last; # stop looking at other target names | |
979 | } | |
980 | ||
981 | if($neg) { | |
982 | if( $dont_ignore ) { | |
983 | $dont_ignore = ''; | |
984 | delete $para->[1]{'target_matching'}; | |
985 | DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; | |
986 | } else { | |
987 | $dont_ignore = 1; | |
988 | $para->[1]{'target_matching'} = '!'; | |
989 | DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; | |
990 | } | |
991 | } | |
992 | ||
993 | $para->[0] = '=for'; # Just what we happen to call these, internally | |
994 | $para->[1]{'~really'} ||= '=begin'; | |
995 | $para->[1]{'~ignore'} = (! $dont_ignore) || 0; | |
996 | $para->[1]{'~resolve'} = $to_resolve || 0; | |
997 | ||
998 | DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', | |
999 | "ignore contents of this region\n"; | |
1000 | DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", | |
1001 | ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; | |
1002 | DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; | |
1003 | ||
1004 | push @$curr_open, $para; | |
1005 | if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { | |
1006 | DEBUG > 1 and print "Ignoring ignorable =begin\n"; | |
1007 | } else { | |
1008 | $self->{'content_seen'} ||= 1; | |
1009 | $self->_handle_element_start((my $scratch='for'), $para->[1]); | |
1010 | } | |
1011 | ||
1012 | return 1; | |
1013 | } | |
1014 | ||
1015 | sub _ponder_end { | |
1016 | my ($self,$para,$curr_open,$paras) = @_; | |
1017 | my $content = join ' ', splice @$para, 2; | |
1018 | $content =~ s/^\s+//s; | |
1019 | $content =~ s/\s+$//s; | |
1020 | DEBUG and print "Ogling '=end $content' directive\n"; | |
60527824 | 1021 | |
351625bd SP |
1022 | unless(length($content)) { |
1023 | $self->whine( | |
1024 | $para->[1]{'start_line'}, | |
1025 | "'=end' without a target?" . ( | |
1026 | ( @$curr_open and $curr_open->[-1][0] eq '=for' ) | |
1027 | ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) | |
1028 | : '' | |
1029 | ) | |
1030 | ); | |
1031 | DEBUG and print "Ignoring targetless =end\n"; | |
1032 | return 1; | |
1033 | } | |
1034 | ||
1035 | unless($content =~ m/^\S+$/) { # i.e., unless it's one word | |
1036 | $self->whine( | |
1037 | $para->[1]{'start_line'}, | |
1038 | "'=end $content' is invalid. (Stack: " | |
1039 | . $self->_dump_curr_open() . ')' | |
1040 | ); | |
1041 | DEBUG and print "Ignoring mistargetted =end $content\n"; | |
1042 | return 1; | |
1043 | } | |
1044 | ||
1045 | unless(@$curr_open and $curr_open->[-1][0] eq '=for') { | |
1046 | $self->whine( | |
1047 | $para->[1]{'start_line'}, | |
1048 | "=end $content without matching =begin. (Stack: " | |
1049 | . $self->_dump_curr_open() . ')' | |
1050 | ); | |
1051 | DEBUG and print "Ignoring mistargetted =end $content\n"; | |
1052 | return 1; | |
1053 | } | |
1054 | ||
1055 | unless($content eq $curr_open->[-1][1]{'target'}) { | |
1056 | $self->whine( | |
1057 | $para->[1]{'start_line'}, | |
1058 | "=end $content doesn't match =begin " | |
1059 | . $curr_open->[-1][1]{'target'} | |
1060 | . ". (Stack: " | |
1061 | . $self->_dump_curr_open() . ')' | |
1062 | ); | |
1063 | DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; | |
1064 | return 1; | |
1065 | } | |
1066 | ||
1067 | # Else it's okay to close... | |
1068 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | |
1069 | DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; | |
1070 | # And that may be because of this to-be-closed =for region, or some | |
1071 | # other one, but it doesn't matter. | |
1072 | } else { | |
1073 | $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; | |
1074 | # what's that for? | |
1075 | ||
1076 | $self->{'content_seen'} ||= 1; | |
60527824 | 1077 | $self->_handle_element_end( my $scratch = 'for', $para->[1]); |
351625bd SP |
1078 | } |
1079 | DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; | |
1080 | pop @$curr_open; | |
1081 | ||
1082 | return 1; | |
1083 | } | |
1084 | ||
1085 | sub _ponder_doc_end { | |
1086 | my ($self,$para,$curr_open,$paras) = @_; | |
1087 | if(@$curr_open) { # Deal with things left open | |
1088 | DEBUG and print "Stack is nonempty at end-document: (", | |
1089 | $self->_dump_curr_open(), ")\n"; | |
1090 | ||
1091 | DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; | |
1092 | unshift @$paras, $self->_closers_for_all_curr_open; | |
1093 | # Make sure there is exactly one ~end in the parastack, at the end: | |
1094 | @$paras = grep $_->[0] ne '~end', @$paras; | |
1095 | push @$paras, $para, $para; | |
1096 | # We need two -- once for the next cycle where we | |
1097 | # generate errata, and then another to be at the end | |
1098 | # when that loop back around to process the errata. | |
1099 | return 1; | |
1100 | ||
1101 | } else { | |
1102 | DEBUG and print "Okay, stack is empty now.\n"; | |
1103 | } | |
1104 | ||
1105 | # Try generating errata section, if applicable | |
1106 | unless($self->{'~tried_gen_errata'}) { | |
1107 | $self->{'~tried_gen_errata'} = 1; | |
1108 | my @extras = $self->_gen_errata(); | |
1109 | if(@extras) { | |
1110 | unshift @$paras, @extras; | |
1111 | DEBUG and print "Generated errata... relooping...\n"; | |
1112 | return 1; # I.e., loop around again to process these fake-o paragraphs | |
1113 | } | |
1114 | } | |
1115 | ||
1116 | splice @$paras; # Well, that's that for this paragraph buffer. | |
1117 | DEBUG and print "Throwing end-document event.\n"; | |
1118 | ||
1119 | $self->_handle_element_end( my $scratch = 'Document' ); | |
1120 | return 1; # Hasta la byebye | |
1121 | } | |
1122 | ||
1123 | sub _ponder_pod { | |
1124 | my ($self,$para,$curr_open,$paras) = @_; | |
1125 | $self->whine( | |
1126 | $para->[1]{'start_line'}, | |
1127 | "=pod directives shouldn't be over one line long! Ignoring all " | |
1128 | . (@$para - 2) . " lines of content" | |
1129 | ) if @$para > 3; | |
60527824 FR |
1130 | |
1131 | # Content ignored unless 'pod_handler' is set | |
1132 | if (my $pod_handler = $self->{'pod_handler'}) { | |
1133 | my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; | |
1134 | $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output | |
1135 | $pod_handler->($line, $line_num, $self); | |
1136 | } | |
1137 | ||
1138 | # The surrounding methods set content_seen, so let us remain consistent. | |
1139 | # I do not know why it was not here before -- should it not be here? | |
1140 | # $self->{'content_seen'} ||= 1; | |
1141 | ||
351625bd SP |
1142 | return; |
1143 | } | |
1144 | ||
1145 | sub _ponder_over { | |
1146 | my ($self,$para,$curr_open,$paras) = @_; | |
1147 | return 1 unless @$paras; | |
1148 | my $list_type; | |
1149 | ||
1150 | if($paras->[0][0] eq '=item') { # most common case | |
1151 | $list_type = $self->_get_initial_item_type($paras->[0]); | |
1152 | ||
1153 | } elsif($paras->[0][0] eq '=back') { | |
60527824 FR |
1154 | # Ignore empty lists by default |
1155 | if ($self->{'parse_empty_lists'}) { | |
1156 | $list_type = 'empty'; | |
1157 | } else { | |
1158 | shift @$paras; | |
1159 | return 1; | |
1160 | } | |
351625bd SP |
1161 | } elsif($paras->[0][0] eq '~end') { |
1162 | $self->whine( | |
1163 | $para->[1]{'start_line'}, | |
1164 | "=over is the last thing in the document?!" | |
1165 | ); | |
1166 | return 1; # But feh, ignore it. | |
1167 | } else { | |
1168 | $list_type = 'block'; | |
1169 | } | |
1170 | $para->[1]{'~type'} = $list_type; | |
1171 | push @$curr_open, $para; | |
1172 | # yes, we reuse the paragraph as a stack item | |
1173 | ||
1174 | my $content = join ' ', splice @$para, 2; | |
1175 | my $overness; | |
1176 | if($content =~ m/^\s*$/s) { | |
1177 | $para->[1]{'indent'} = 4; | |
1178 | } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { | |
1179 | no integer; | |
1180 | $para->[1]{'indent'} = $1; | |
1181 | if($1 == 0) { | |
1182 | $self->whine( | |
1183 | $para->[1]{'start_line'}, | |
1184 | "Can't have a 0 in =over $content" | |
1185 | ); | |
1186 | $para->[1]{'indent'} = 4; | |
1187 | } | |
1188 | } else { | |
1189 | $self->whine( | |
1190 | $para->[1]{'start_line'}, | |
1191 | "=over should be: '=over' or '=over positive_number'" | |
1192 | ); | |
1193 | $para->[1]{'indent'} = 4; | |
1194 | } | |
1195 | DEBUG > 1 and print "=over found of type $list_type\n"; | |
1196 | ||
1197 | $self->{'content_seen'} ||= 1; | |
1198 | $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); | |
1199 | ||
1200 | return; | |
1201 | } | |
1202 | ||
1203 | sub _ponder_back { | |
1204 | my ($self,$para,$curr_open,$paras) = @_; | |
1205 | # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? | |
1206 | ||
1207 | my $content = join ' ', splice @$para, 2; | |
1208 | if($content =~ m/\S/) { | |
1209 | $self->whine( | |
1210 | $para->[1]{'start_line'}, | |
1211 | "=back doesn't take any parameters, but you said =back $content" | |
1212 | ); | |
1213 | } | |
1214 | ||
1215 | if(@$curr_open and $curr_open->[-1][0] eq '=over') { | |
1216 | DEBUG > 1 and print "=back happily closes matching =over\n"; | |
1217 | # Expected case: we're closing the most recently opened thing | |
1218 | #my $over = pop @$curr_open; | |
1219 | $self->{'content_seen'} ||= 1; | |
1220 | $self->_handle_element_end( my $scratch = | |
60527824 | 1221 | 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] |
351625bd SP |
1222 | ); |
1223 | } else { | |
1224 | DEBUG > 1 and print "=back found without a matching =over. Stack: (", | |
1225 | join(', ', map $_->[0], @$curr_open), ").\n"; | |
1226 | $self->whine( | |
1227 | $para->[1]{'start_line'}, | |
1228 | '=back without =over' | |
1229 | ); | |
1230 | return 1; # and ignore it | |
1231 | } | |
1232 | } | |
1233 | ||
1234 | sub _ponder_item { | |
1235 | my ($self,$para,$curr_open,$paras) = @_; | |
1236 | my $over; | |
60527824 FR |
1237 | unless(@$curr_open and |
1238 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | |
351625bd SP |
1239 | $self->whine( |
1240 | $para->[1]{'start_line'}, | |
1241 | "'=item' outside of any '=over'" | |
1242 | ); | |
1243 | unshift @$paras, | |
1244 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | |
1245 | $para | |
1246 | ; | |
1247 | return 1; | |
1248 | } | |
1249 | ||
1250 | ||
1251 | my $over_type = $over->[1]{'~type'}; | |
1252 | ||
1253 | if(!$over_type) { | |
1254 | # Shouldn't happen1 | |
1255 | die "Typeless over in stack, starting at line " | |
1256 | . $over->[1]{'start_line'}; | |
1257 | ||
1258 | } elsif($over_type eq 'block') { | |
1259 | unless($curr_open->[-1][1]{'~bitched_about'}) { | |
1260 | $curr_open->[-1][1]{'~bitched_about'} = 1; | |
1261 | $self->whine( | |
1262 | $curr_open->[-1][1]{'start_line'}, | |
1263 | "You can't have =items (as at line " | |
1264 | . $para->[1]{'start_line'} | |
1265 | . ") unless the first thing after the =over is an =item" | |
1266 | ); | |
1267 | } | |
1268 | # Just turn it into a paragraph and reconsider it | |
1269 | $para->[0] = '~Para'; | |
1270 | unshift @$paras, $para; | |
1271 | return 1; | |
1272 | ||
1273 | } elsif($over_type eq 'text') { | |
1274 | my $item_type = $self->_get_item_type($para); | |
1275 | # That kills the content of the item if it's a number or bullet. | |
1276 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | |
1277 | ||
1278 | if($item_type eq 'text') { | |
1279 | # Nothing special needs doing for 'text' | |
1280 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | |
1281 | die "Unknown item type $item_type" | |
1282 | unless $item_type eq 'number' or $item_type eq 'bullet'; | |
1283 | # Undo our clobbering: | |
1284 | push @$para, $para->[1]{'~orig_content'}; | |
1285 | delete $para->[1]{'number'}; | |
1286 | # Only a PROPER item-number element is allowed | |
1287 | # to have a number attribute. | |
1288 | } else { | |
1289 | die "Unhandled item type $item_type"; # should never happen | |
1290 | } | |
1291 | ||
1292 | # =item-text thingies don't need any assimilation, it seems. | |
1293 | ||
1294 | } elsif($over_type eq 'number') { | |
1295 | my $item_type = $self->_get_item_type($para); | |
1296 | # That kills the content of the item if it's a number or bullet. | |
1297 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | |
1298 | ||
1299 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | |
1300 | ||
1301 | if($item_type eq 'bullet') { | |
1302 | # Hm, it's not numeric. Correct for this. | |
1303 | $para->[1]{'number'} = $expected_value; | |
1304 | $self->whine( | |
1305 | $para->[1]{'start_line'}, | |
1306 | "Expected '=item $expected_value'" | |
1307 | ); | |
1308 | push @$para, $para->[1]{'~orig_content'}; | |
1309 | # restore the bullet, blocking the assimilation of next para | |
1310 | ||
1311 | } elsif($item_type eq 'text') { | |
1312 | # Hm, it's not numeric. Correct for this. | |
1313 | $para->[1]{'number'} = $expected_value; | |
1314 | $self->whine( | |
1315 | $para->[1]{'start_line'}, | |
1316 | "Expected '=item $expected_value'" | |
1317 | ); | |
1318 | # Text content will still be there and will block next ~Para | |
1319 | ||
1320 | } elsif($item_type ne 'number') { | |
1321 | die "Unknown item type $item_type"; # should never happen | |
1322 | ||
1323 | } elsif($expected_value == $para->[1]{'number'}) { | |
1324 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | |
1325 | ||
1326 | } else { | |
1327 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | |
1328 | " instead of the expected value of $expected_value\n"; | |
1329 | $self->whine( | |
1330 | $para->[1]{'start_line'}, | |
1331 | "You have '=item " . $para->[1]{'number'} . | |
1332 | "' instead of the expected '=item $expected_value'" | |
1333 | ); | |
1334 | $para->[1]{'number'} = $expected_value; # correcting!! | |
1335 | } | |
1336 | ||
1337 | if(@$para == 2) { | |
1338 | # For the cases where we /didn't/ push to @$para | |
1339 | if($paras->[0][0] eq '~Para') { | |
1340 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | |
1341 | push @$para, splice @{shift @$paras},2; | |
1342 | } else { | |
1343 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | |
1344 | push @$para, ''; # Just so it's not contentless | |
1345 | } | |
1346 | } | |
1347 | ||
1348 | ||
1349 | } elsif($over_type eq 'bullet') { | |
1350 | my $item_type = $self->_get_item_type($para); | |
1351 | # That kills the content of the item if it's a number or bullet. | |
1352 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | |
1353 | ||
1354 | if($item_type eq 'bullet') { | |
1355 | # as expected! | |
1356 | ||
1357 | if( $para->[1]{'~_freaky_para_hack'} ) { | |
1358 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | |
1359 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | |
1360 | } | |
1361 | ||
1362 | } elsif($item_type eq 'number') { | |
1363 | $self->whine( | |
1364 | $para->[1]{'start_line'}, | |
1365 | "Expected '=item *'" | |
1366 | ); | |
1367 | push @$para, $para->[1]{'~orig_content'}; | |
1368 | # and block assimilation of the next paragraph | |
1369 | delete $para->[1]{'number'}; | |
1370 | # Only a PROPER item-number element is allowed | |
1371 | # to have a number attribute. | |
1372 | } elsif($item_type eq 'text') { | |
1373 | $self->whine( | |
1374 | $para->[1]{'start_line'}, | |
1375 | "Expected '=item *'" | |
1376 | ); | |
1377 | # But doesn't need processing. But it'll block assimilation | |
1378 | # of the next para. | |
1379 | } else { | |
1380 | die "Unhandled item type $item_type"; # should never happen | |
1381 | } | |
1382 | ||
1383 | if(@$para == 2) { | |
1384 | # For the cases where we /didn't/ push to @$para | |
1385 | if($paras->[0][0] eq '~Para') { | |
1386 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | |
1387 | push @$para, splice @{shift @$paras},2; | |
1388 | } else { | |
1389 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | |
1390 | push @$para, ''; # Just so it's not contentless | |
1391 | } | |
1392 | } | |
1393 | ||
1394 | } else { | |
1395 | die "Unhandled =over type \"$over_type\"?"; | |
1396 | # Shouldn't happen! | |
1397 | } | |
1398 | $para->[0] .= '-' . $over_type; | |
1399 | ||
1400 | return; | |
1401 | } | |
1402 | ||
1403 | sub _ponder_Plain { | |
1404 | my ($self,$para) = @_; | |
1405 | DEBUG and print " giving plain treatment...\n"; | |
1406 | unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) | |
1407 | or $para->[1]{'~cooked'} | |
1408 | ) { | |
1409 | push @$para, | |
1410 | @{$self->_make_treelet( | |
1411 | join("\n", splice(@$para, 2)), | |
1412 | $para->[1]{'start_line'} | |
1413 | )}; | |
1414 | } | |
1415 | # Empty paragraphs don't need a treelet for any reason I can see. | |
1416 | # And precooked paragraphs already have a treelet. | |
1417 | return; | |
1418 | } | |
1419 | ||
1420 | sub _ponder_Verbatim { | |
1421 | my ($self,$para) = @_; | |
1422 | DEBUG and print " giving verbatim treatment...\n"; | |
1423 | ||
1424 | $para->[1]{'xml:space'} = 'preserve'; | |
9d65762f DW |
1425 | |
1426 | my $indent = $self->strip_verbatim_indent; | |
1427 | if ($indent && ref $indent eq 'CODE') { | |
1428 | my @shifted = (shift @{$para}, shift @{$para}); | |
1429 | $indent = $indent->($para); | |
1430 | unshift @{$para}, @shifted; | |
1431 | } | |
1432 | ||
351625bd SP |
1433 | for(my $i = 2; $i < @$para; $i++) { |
1434 | foreach my $line ($para->[$i]) { # just for aliasing | |
9d65762f | 1435 | # Strip indentation. |
5aeca1f7 | 1436 | $line =~ s/^\Q$indent// if $indent |
9d65762f | 1437 | && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); |
351625bd SP |
1438 | while( $line =~ |
1439 | # Sort of adapted from Text::Tabs -- yes, it's hardwired in that | |
1440 | # tabs are at every EIGHTH column. For portability, it has to be | |
1441 | # one setting everywhere, and 8th wins. | |
1442 | s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e | |
1443 | ) {} | |
1444 | ||
1445 | # TODO: whinge about (or otherwise treat) unindented or overlong lines | |
1446 | ||
1447 | } | |
1448 | } | |
1449 | ||
1450 | # Now the VerbatimFormatted hoodoo... | |
1451 | if( $self->{'accept_codes'} and | |
1452 | $self->{'accept_codes'}{'VerbatimFormatted'} | |
1453 | ) { | |
1454 | while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } | |
1455 | # Kill any number of terminal newlines | |
1456 | $self->_verbatim_format($para); | |
1457 | } elsif ($self->{'codes_in_verbatim'}) { | |
1458 | push @$para, | |
1459 | @{$self->_make_treelet( | |
1460 | join("\n", splice(@$para, 2)), | |
1461 | $para->[1]{'start_line'}, $para->[1]{'xml:space'} | |
1462 | )}; | |
1463 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | |
1464 | } else { | |
1465 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | |
1466 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | |
1467 | } | |
1468 | return; | |
1469 | } | |
1470 | ||
1471 | sub _ponder_Data { | |
1472 | my ($self,$para) = @_; | |
1473 | DEBUG and print " giving data treatment...\n"; | |
1474 | $para->[1]{'xml:space'} = 'preserve'; | |
1475 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | |
1476 | return; | |
1477 | } | |
1478 | ||
1479 | ||
1480 | ||
1481 | ||
1482 | ########################################################################### | |
1483 | ||
1484 | sub _traverse_treelet_bit { # for use only by the routine above | |
1485 | my($self, $name) = splice @_,0,2; | |
1486 | ||
1487 | my $scratch; | |
1488 | $self->_handle_element_start(($scratch=$name), shift @_); | |
1489 | ||
0ace302a SH |
1490 | while (@_) { |
1491 | my $x = shift; | |
1492 | if (ref($x)) { | |
351625bd SP |
1493 | &_traverse_treelet_bit($self, @$x); |
1494 | } else { | |
0ace302a | 1495 | $x .= shift while @_ && !ref($_[0]); |
351625bd SP |
1496 | $self->_handle_text($x); |
1497 | } | |
1498 | } | |
1499 | ||
1500 | $self->_handle_element_end($scratch=$name); | |
1501 | return; | |
1502 | } | |
1503 | ||
1504 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
1505 | ||
1506 | sub _closers_for_all_curr_open { | |
1507 | my $self = $_[0]; | |
1508 | my @closers; | |
1509 | foreach my $still_open (@{ $self->{'curr_open'} || return }) { | |
1510 | my @copy = @$still_open; | |
1511 | $copy[1] = {%{ $copy[1] }}; | |
1512 | #$copy[1]{'start_line'} = -1; | |
1513 | if($copy[0] eq '=for') { | |
1514 | $copy[0] = '=end'; | |
1515 | } elsif($copy[0] eq '=over') { | |
1516 | $copy[0] = '=back'; | |
1517 | } else { | |
1518 | die "I don't know how to auto-close an open $copy[0] region"; | |
1519 | } | |
1520 | ||
1521 | unless( @copy > 2 ) { | |
1522 | push @copy, $copy[1]{'target'}; | |
1523 | $copy[-1] = '' unless defined $copy[-1]; | |
1524 | # since =over's don't have targets | |
1525 | } | |
60527824 FR |
1526 | |
1527 | $copy[1]{'fake-closer'} = 1; | |
1528 | ||
351625bd SP |
1529 | DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; |
1530 | unshift @closers, \@copy; | |
1531 | } | |
1532 | return @closers; | |
1533 | } | |
1534 | ||
1535 | #-------------------------------------------------------------------------- | |
1536 | ||
1537 | sub _verbatim_format { | |
1538 | my($it, $p) = @_; | |
1539 | ||
1540 | my $formatting; | |
1541 | ||
1542 | for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines | |
1543 | DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; | |
1544 | $p->[$i] .= "\n"; | |
1545 | # Unlike with simple Verbatim blocks, we don't end up just doing | |
1546 | # a join("\n", ...) on the contents, so we have to append a | |
1547 | # newline to ever line, and then nix the last one later. | |
1548 | } | |
1549 | ||
1550 | if( DEBUG > 4 ) { | |
1551 | print "<<\n"; | |
1552 | for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines | |
1553 | print "_verbatim_format $i: $p->[$i]"; | |
1554 | } | |
1555 | print ">>\n"; | |
1556 | } | |
1557 | ||
1558 | for(my $i = $#$p; $i > 2; $i--) { | |
1559 | # work backwards over the lines, except the first (#2) | |
1560 | ||
1561 | #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s | |
1562 | # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; | |
1563 | # look at a formatty line preceding a nonformatty one | |
1564 | DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; | |
1565 | if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { | |
1566 | DEBUG > 5 and print " It's a formatty line. ", | |
1567 | "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; | |
1568 | ||
1569 | if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { | |
1570 | DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; | |
1571 | next; | |
1572 | } else { | |
1573 | DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; | |
1574 | } | |
1575 | } else { | |
1576 | DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; | |
1577 | next; | |
1578 | } | |
1579 | ||
1580 | # A formatty line has to have #: in the first two columns, and uses | |
1581 | # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. | |
1582 | # Example: | |
1583 | # What do you want? i like pie. [or whatever] | |
1584 | # #:^^^^^^^^^^^^^^^^^ ///////////// | |
1585 | ||
1586 | ||
1587 | DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; | |
1588 | ||
1589 | $formatting = ' ' . $1; | |
1590 | $formatting =~ s/\s+$//s; # nix trailing whitespace | |
1591 | unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op | |
1592 | splice @$p,$i,1; # remove this line | |
1593 | $i--; # don't consider next line | |
1594 | next; | |
1595 | } | |
1596 | ||
1597 | if( length($formatting) >= length($p->[$i-1]) ) { | |
1598 | $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; | |
1599 | } else { | |
1600 | $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); | |
1601 | } | |
1602 | # Make $formatting and the previous line be exactly the same length, | |
1603 | # with $formatting having a " " as the last character. | |
1604 | ||
1605 | DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; | |
1606 | ||
1607 | ||
1608 | my @new_line; | |
1609 | while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { | |
1610 | #print "Format matches $1\n"; | |
1611 | ||
1612 | if($2) { | |
1613 | #print "SKIPPING <$2>\n"; | |
1614 | push @new_line, | |
1615 | substr($p->[$i-1], pos($formatting)-length($1), length($1)); | |
1616 | } else { | |
1617 | #print "SNARING $+\n"; | |
1618 | push @new_line, [ | |
1619 | ( | |
1620 | $3 ? 'VerbatimB' : | |
1621 | $4 ? 'VerbatimI' : | |
1622 | $5 ? 'VerbatimBI' : die("Should never get called") | |
1623 | ), {}, | |
1624 | substr($p->[$i-1], pos($formatting)-length($1), length($1)) | |
1625 | ]; | |
1626 | #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; | |
1627 | } | |
1628 | } | |
1629 | my @nixed = | |
1630 | splice @$p, $i-1, 2, @new_line; # replace myself and the next line | |
1631 | DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; | |
1632 | ||
1633 | DEBUG > 6 and print "New version of the above line is these tokens (", | |
1634 | scalar(@new_line), "):", | |
1635 | map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; | |
1636 | $i--; # So the next line we scrutinize is the line before the one | |
1637 | # that we just went and formatted | |
1638 | } | |
1639 | ||
1640 | $p->[0] = 'VerbatimFormatted'; | |
1641 | ||
1642 | # Collapse adjacent text nodes, just for kicks. | |
1643 | for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last | |
1644 | if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { | |
1645 | DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; | |
1646 | $p->[$i] .= splice @$p, $i+1, 1; # merge | |
1647 | --$i; # and back up | |
1648 | } | |
1649 | } | |
1650 | ||
1651 | # Now look for the last text token, and remove the terminal newline | |
1652 | for( my $i = $#$p; $i >= 2; $i-- ) { | |
1653 | # work backwards over the tokens, even the first | |
1654 | if( !ref($p->[$i]) ) { | |
1655 | if($p->[$i] =~ s/\n$//s) { | |
1656 | DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; | |
1657 | } else { | |
1658 | DEBUG > 5 and print | |
1659 | "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; | |
1660 | } | |
1661 | last; # we only want the next one | |
1662 | } | |
1663 | } | |
1664 | ||
1665 | return; | |
1666 | } | |
1667 | ||
1668 | ||
1669 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
1670 | ||
1671 | ||
1672 | sub _treelet_from_formatting_codes { | |
1673 | # Given a paragraph, returns a treelet. Full of scary tokenizing code. | |
1674 | # Like [ '~Top', {'start_line' => $start_line}, | |
1675 | # "I like ", | |
1676 | # [ 'B', {}, "pie" ], | |
1677 | # "!" | |
1678 | # ] | |
1679 | ||
1680 | my($self, $para, $start_line, $preserve_space) = @_; | |
1681 | ||
1682 | my $treelet = ['~Top', {'start_line' => $start_line},]; | |
1683 | ||
1684 | unless ($preserve_space || $self->{'preserve_whitespace'}) { | |
351625bd SP |
1685 | $para =~ s/\s+/ /g; # collapse and trim all whitespace first. |
1686 | $para =~ s/ $//; | |
1687 | $para =~ s/^ //; | |
1688 | } | |
1689 | ||
1690 | # Only apparent problem the above code is that N<< >> turns into | |
1691 | # N<< >>. But then, word wrapping does that too! So don't do that! | |
1692 | ||
1693 | my @stack; | |
1694 | my @lineage = ($treelet); | |
60527824 FR |
1695 | my $raw = ''; # raw content of L<> fcode before splitting/processing |
1696 | # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed | |
1697 | # into just 1 ' '. Is this the regex's doing or 'raw's? | |
1698 | my $inL = 0; | |
351625bd SP |
1699 | |
1700 | DEBUG > 4 and print "Paragraph:\n$para\n\n"; | |
1701 | ||
1702 | # Here begins our frightening tokenizer RE. The following regex matches | |
1703 | # text in four main parts: | |
1704 | # | |
1705 | # * Start-codes. The first alternative matches C< or C<<, the latter | |
1706 | # followed by some whitespace. $1 will hold the entire start code | |
1707 | # (including any space following a multiple-angle-bracket delimiter), | |
1708 | # and $2 will hold only the additional brackets past the first in a | |
1709 | # multiple-bracket delimiter. length($2) + 1 will be the number of | |
1710 | # closing brackets we have to find. | |
1711 | # | |
1712 | # * Closing brackets. Match some amount of whitespace followed by | |
1713 | # multiple close brackets. The logic to see if this closes anything | |
1714 | # is down below. Note that in order to parse C<< >> correctly, we | |
1715 | # have to use look-behind (?<=\s\s), since the match of the starting | |
1716 | # code will have consumed the whitespace. | |
1717 | # | |
1718 | # * A single closing bracket, to close a simple code like C<>. | |
1719 | # | |
1720 | # * Something that isn't a start or end code. We have to be careful | |
1721 | # about accepting whitespace, since perlpodspec says that any whitespace | |
1722 | # before a multiple-bracket closing delimiter should be ignored. | |
1723 | # | |
1724 | while($para =~ | |
1725 | m/\G | |
1726 | (?: | |
1727 | # Match starting codes, including the whitespace following a | |
1728 | # multiple-delimiter start code. $1 gets the whole start code and | |
1729 | # $2 gets all but one of the <s in the multiple-bracket case. | |
1730 | ([A-Z]<(?:(<+)\s+)?) | |
1731 | | | |
1732 | # Match multiple-bracket end codes. $3 gets the whitespace that | |
1733 | # should be discarded before an end bracket but kept in other cases | |
1734 | # and $4 gets the end brackets themselves. | |
1735 | (\s+|(?<=\s\s))(>{2,}) | |
1736 | | | |
1737 | (\s?>) # $5: simple end-codes | |
1738 | | | |
1739 | ( # $6: stuff containing no start-codes or end-codes | |
1740 | (?: | |
6669d9b5 | 1741 | [^A-Z\s>] |
351625bd SP |
1742 | | |
1743 | (?: | |
1744 | [A-Z](?!<) | |
1745 | ) | |
1746 | | | |
69473a20 SP |
1747 | # whitespace is ok, but we don't want to eat the whitespace before |
1748 | # a multiple-bracket end code. | |
1749 | # NOTE: we may still have problems with e.g. S<< >> | |
351625bd | 1750 | (?: |
69473a20 | 1751 | \s(?!\s*>{2,}) |
351625bd SP |
1752 | ) |
1753 | )+ | |
1754 | ) | |
1755 | ) | |
1756 | /xgo | |
1757 | ) { | |
1758 | DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; | |
1759 | if(defined $1) { | |
1760 | if(defined $2) { | |
1761 | DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; | |
316e9929 RS |
1762 | push @stack, length($2) + 1; |
1763 | # length of the necessary complex end-code string | |
351625bd SP |
1764 | } else { |
1765 | DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; | |
316e9929 | 1766 | push @stack, 0; # signal that we're looking for simple |
351625bd | 1767 | } |
316e9929 RS |
1768 | push @lineage, [ substr($1,0,1), {}, ]; # new node object |
1769 | push @{ $lineage[-2] }, $lineage[-1]; | |
60527824 FR |
1770 | if ('L' eq substr($1,0,1)) { |
1771 | $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator | |
1772 | $inL = 1; | |
1773 | } else { | |
1774 | $raw .= $1 if $inL; | |
1775 | } | |
1776 | ||
351625bd SP |
1777 | } elsif(defined $4) { |
1778 | DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; | |
1779 | # This is where it gets messy... | |
1780 | if(! @stack) { | |
1781 | # We saw " >>>>" but needed nothing. This is ALL just stuff then. | |
1782 | DEBUG > 4 and print " But it's really just stuff.\n"; | |
1783 | push @{ $lineage[-1] }, $3, $4; | |
1784 | next; | |
1785 | } elsif(!$stack[-1]) { | |
1786 | # We saw " >>>>" but needed only ">". Back pos up. | |
1787 | DEBUG > 4 and print " And that's more than we needed to close simple.\n"; | |
1788 | push @{ $lineage[-1] }, $3; # That was a for-real space, too. | |
1789 | pos($para) = pos($para) - length($4) + 1; | |
1790 | } elsif($stack[-1] == length($4)) { | |
1791 | # We found " >>>>", and it was exactly what we needed. Commonest case. | |
1792 | DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; | |
1793 | } elsif($stack[-1] < length($4)) { | |
1794 | # We saw " >>>>" but needed only " >>". Back pos up. | |
1795 | DEBUG > 4 and print " And that's more than we needed to close complex.\n"; | |
1796 | pos($para) = pos($para) - length($4) + $stack[-1]; | |
1797 | } else { | |
1798 | # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! | |
1799 | DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; | |
1800 | push @{ $lineage[-1] }, $3, $4; | |
1801 | next; | |
1802 | } | |
1803 | #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; | |
1804 | ||
1805 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | |
1806 | # Keep the element from being childless | |
1807 | ||
1808 | pop @stack; | |
1809 | pop @lineage; | |
60527824 FR |
1810 | |
1811 | unless (@stack) { # not in an L if there are no open fcodes | |
1812 | $inL = 0; | |
1813 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | |
1814 | $lineage[-1][-1][1]{'raw'} = $raw | |
1815 | } | |
1816 | } | |
1817 | $raw .= $3.$4 if $inL; | |
351625bd SP |
1818 | |
1819 | } elsif(defined $5) { | |
9d65762f | 1820 | DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n"; |
351625bd SP |
1821 | |
1822 | if(@stack and ! $stack[-1]) { | |
1823 | # We're indeed expecting a simple end-code | |
1824 | DEBUG > 4 and print " It's indeed an end-code.\n"; | |
1825 | ||
1826 | if(length($5) == 2) { # There was a space there: " >" | |
1827 | push @{ $lineage[-1] }, ' '; | |
1828 | } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element | |
1829 | push @{ $lineage[-1] }, ''; # keep it from being really childless | |
1830 | } | |
1831 | ||
1832 | pop @stack; | |
1833 | pop @lineage; | |
1834 | } else { | |
1835 | DEBUG > 4 and print " It's just stuff.\n"; | |
1836 | push @{ $lineage[-1] }, $5; | |
1837 | } | |
1838 | ||
60527824 FR |
1839 | unless (@stack) { # not in an L if there are no open fcodes |
1840 | $inL = 0; | |
1841 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | |
1842 | $lineage[-1][-1][1]{'raw'} = $raw | |
1843 | } | |
1844 | } | |
1845 | $raw .= $5 if $inL; | |
1846 | ||
351625bd SP |
1847 | } elsif(defined $6) { |
1848 | DEBUG > 3 and print "Found stuff \"$6\"\n"; | |
1849 | push @{ $lineage[-1] }, $6; | |
60527824 FR |
1850 | $raw .= $6 if $inL; |
1851 | # XXX does not capture multiplace whitespaces -- 'raw' ends up with | |
1852 | # at most 1 leading/trailing whitespace, why not all of it? | |
1853 | ||
351625bd SP |
1854 | } else { |
1855 | # should never ever ever ever happen | |
1856 | DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; | |
1857 | die "SPORK 512512!"; | |
1858 | } | |
1859 | } | |
1860 | ||
1861 | if(@stack) { # Uhoh, some sequences weren't closed. | |
1862 | my $x= "..."; | |
1863 | while(@stack) { | |
1864 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | |
1865 | # Hmmmmm! | |
1866 | ||
1867 | my $code = (pop @lineage)->[0]; | |
1868 | my $ender_length = pop @stack; | |
1869 | if($ender_length) { | |
1870 | --$ender_length; | |
1871 | $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); | |
1872 | } else { | |
1873 | $x = $code . "<$x>"; | |
1874 | } | |
1875 | } | |
1876 | DEBUG > 1 and print "Unterminated $x sequence\n"; | |
1877 | $self->whine($start_line, | |
1878 | "Unterminated $x sequence", | |
1879 | ); | |
1880 | } | |
60527824 | 1881 | |
351625bd SP |
1882 | return $treelet; |
1883 | } | |
1884 | ||
1885 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
1886 | ||
1887 | sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) | |
1888 | return stringify_lol($_[1]); | |
1889 | } | |
1890 | ||
1891 | sub stringify_lol { # function: stringify_lol($lol) | |
1892 | my $string_form = ''; | |
1893 | _stringify_lol( $_[0] => \$string_form ); | |
1894 | return $string_form; | |
1895 | } | |
1896 | ||
1897 | sub _stringify_lol { # the real recursor | |
1898 | my($lol, $to) = @_; | |
351625bd SP |
1899 | for(my $i = 2; $i < @$lol; ++$i) { |
1900 | if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { | |
1901 | _stringify_lol( $lol->[$i], $to); # recurse! | |
1902 | } else { | |
1903 | $$to .= $lol->[$i]; | |
1904 | } | |
1905 | } | |
1906 | return; | |
1907 | } | |
1908 | ||
1909 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
1910 | ||
1911 | sub _dump_curr_open { # return a string representation of the stack | |
1912 | my $curr_open = $_[0]{'curr_open'}; | |
1913 | ||
1914 | return '[empty]' unless @$curr_open; | |
1915 | return join '; ', | |
1916 | map {; | |
1917 | ($_->[0] eq '=for') | |
1918 | ? ( ($_->[1]{'~really'} || '=over') | |
1919 | . ' ' . $_->[1]{'target'}) | |
1920 | : $_->[0] | |
1921 | } | |
1922 | @$curr_open | |
1923 | ; | |
1924 | } | |
1925 | ||
1926 | ########################################################################### | |
1927 | my %pretty_form = ( | |
1928 | "\a" => '\a', # ding! | |
1929 | "\b" => '\b', # BS | |
1930 | "\e" => '\e', # ESC | |
1931 | "\f" => '\f', # FF | |
1932 | "\t" => '\t', # tab | |
1933 | "\cm" => '\cm', | |
1934 | "\cj" => '\cj', | |
1935 | "\n" => '\n', # probably overrides one of either \cm or \cj | |
1936 | '"' => '\"', | |
1937 | '\\' => '\\\\', | |
1938 | '$' => '\\$', | |
1939 | '@' => '\\@', | |
1940 | '%' => '\\%', | |
1941 | '#' => '\\#', | |
1942 | ); | |
1943 | ||
1944 | sub pretty { # adopted from Class::Classless | |
1945 | # Not the most brilliant routine, but passable. | |
1946 | # Don't give it a cyclic data structure! | |
1947 | my @stuff = @_; # copy | |
1948 | my $x; | |
1949 | my $out = | |
1950 | # join ",\n" . | |
1951 | join ", ", | |
1952 | map {; | |
1953 | if(!defined($_)) { | |
1954 | "undef"; | |
1955 | } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { | |
1956 | $x = "[ " . pretty(@$_) . " ]" ; | |
1957 | $x; | |
1958 | } elsif(ref($_) eq 'SCALAR') { | |
1959 | $x = "\\" . pretty($$_) ; | |
1960 | $x; | |
1961 | } elsif(ref($_) eq 'HASH') { | |
1962 | my $hr = $_; | |
1963 | $x = "{" . join(", ", | |
1964 | map(pretty($_) . '=>' . pretty($hr->{$_}), | |
1965 | sort keys %$hr ) ) . "}" ; | |
1966 | $x; | |
1967 | } elsif(!length($_)) { q{''} # empty string | |
1968 | } elsif( | |
1969 | $_ eq '0' # very common case | |
1970 | or( | |
1971 | m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s | |
1972 | and $_ ne '-0' # the strange case that that RE lets thru | |
1973 | ) | |
1974 | ) { $_; | |
1975 | } else { | |
1976 | if( chr(65) eq 'A' ) { | |
1977 | s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> | |
1978 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | |
1979 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | |
1980 | } else { | |
1981 | # We're in some crazy non-ASCII world! | |
1982 | s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> | |
1983 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | |
1984 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | |
1985 | } | |
1986 | qq{"$_"}; | |
1987 | } | |
1988 | } @stuff; | |
1989 | # $out =~ s/\n */ /g if length($out) < 75; | |
1990 | return $out; | |
1991 | } | |
1992 | ||
1993 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
9ea6f39e SP |
1994 | |
1995 | # A rather unsubtle method of blowing away all the state information | |
1996 | # from a parser object so it can be reused. Provided as a utility for | |
c9989a74 | 1997 | # backward compatibility in Pod::Man, etc. but not recommended for |
9ea6f39e SP |
1998 | # general use. |
1999 | ||
2000 | sub reinit { | |
2001 | my $self = shift; | |
2002 | foreach (qw(source_dead source_filename doc_has_started | |
2003 | start_of_pod_block content_seen last_was_blank paras curr_open | |
2004 | line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen | |
2005 | Title)) { | |
2006 | ||
2007 | delete $self->{$_}; | |
2008 | } | |
2009 | } | |
2010 | ||
2011 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | |
351625bd SP |
2012 | 1; |
2013 |