ext/Pod-LaTeX/LaTeX.pm

   1 package Pod::LaTeX;
   2
   3 =head1 NAME
   4
   5 Pod::LaTeX - Convert Pod data to formatted Latex
   6
   7 =head1 SYNOPSIS
   8
   9   use Pod::LaTeX;
  10   my $parser = Pod::LaTeX->new ( );
  11
  12   $parser->parse_from_filehandle;
  13
  14   $parser->parse_from_file ('file.pod', 'file.tex');
  15
  16 =head1 DESCRIPTION
  17
  18 C<Pod::LaTeX> is a module to convert documentation in the Pod format
  19 into Latex. The L<B<pod2latex>|pod2latex> X<pod2latex> command uses
  20 this module for translation.
  21
  22 C<Pod::LaTeX> is a derived class from L<Pod::Select|Pod::Select>.
  23
  24 =cut
  25
  26
  27 use strict;
  28 require Pod::ParseUtils;
  29 use base qw/ Pod::Select /;
  30
  31 # use Data::Dumper; # for debugging
  32 use Carp;
  33
  34 use vars qw/ $VERSION %HTML_Escapes @LatexSections /;
  35
  36 $VERSION = '0.58';
  37
  38 # Definitions of =headN -> latex mapping
  39 @LatexSections = (qw/
  40                   chapter
  41                   section
  42                   subsection
  43                   subsubsection
  44                   paragraph
  45                   subparagraph
  46                   /);
  47
  48 # Standard escape sequences converted to Latex.
  49 # The Unicode name of each character is given in the comments.
  50 # Complete LaTeX set added by Peter Acklam.
  51
  52 %HTML_Escapes = (
  53      'sol'    => '\textfractionsolidus{}',  # xxx - or should it be just '/'
  54      'verbar' => '|',
  55
  56      # The stuff below is based on the information available at
  57      # http://www.w3.org/TR/html401/sgml/entities.html
  58
  59      # All characters in the range 0xA0-0xFF of the ISO 8859-1 character set.
  60      # Several of these characters require the `textcomp' LaTeX package.
  61      'nbsp'   => q|~|,                     # 0xA0 - no-break space = non-breaking space
  62      'iexcl'  => q|\textexclamdown{}|,     # 0xA1 - inverted exclamation mark
  63      'cent'   => q|\textcent{}|,           # 0xA2 - cent sign
  64      'pound'  => q|\textsterling{}|,       # 0xA3 - pound sign
  65      'curren' => q|\textcurrency{}|,       # 0xA4 - currency sign
  66      'yen'    => q|\textyen{}|,            # 0xA5 - yen sign = yuan sign
  67      'brvbar' => q|\textbrokenbar{}|,      # 0xA6 - broken bar = broken vertical bar
  68      'sect'   => q|\textsection{}|,        # 0xA7 - section sign
  69      'uml'    => q|\textasciidieresis{}|,  # 0xA8 - diaeresis = spacing diaeresis
  70      'copy'   => q|\textcopyright{}|,      # 0xA9 - copyright sign
  71      'ordf'   => q|\textordfeminine{}|,    # 0xAA - feminine ordinal indicator
  72      'laquo'  => q|\guillemotleft{}|,      # 0xAB - left-pointing double angle quotation mark = left pointing guillemet
  73      'not'    => q|\textlnot{}|,           # 0xAC - not sign
  74      'shy'    => q|\-|,                    # 0xAD - soft hyphen = discretionary hyphen
  75      'reg'    => q|\textregistered{}|,     # 0xAE - registered sign = registered trade mark sign
  76      'macr'   => q|\textasciimacron{}|,    # 0xAF - macron = spacing macron = overline = APL overbar
  77      'deg'    => q|\textdegree{}|,         # 0xB0 - degree sign
  78      'plusmn' => q|\textpm{}|,             # 0xB1 - plus-minus sign = plus-or-minus sign
  79      'sup2'   => q|\texttwosuperior{}|,    # 0xB2 - superscript two = superscript digit two = squared
  80      'sup3'   => q|\textthreesuperior{}|,  # 0xB3 - superscript three = superscript digit three = cubed
  81      'acute'  => q|\textasciiacute{}|,     # 0xB4 - acute accent = spacing acute
  82      'micro'  => q|\textmu{}|,             # 0xB5 - micro sign
  83      'para'   => q|\textparagraph{}|,      # 0xB6 - pilcrow sign = paragraph sign
  84      'middot' => q|\textperiodcentered{}|, # 0xB7 - middle dot = Georgian comma = Greek middle dot
  85      'cedil'  => q|\c{}|,                  # 0xB8 - cedilla = spacing cedilla
  86      'sup1'   => q|\textonesuperior{}|,    # 0xB9 - superscript one = superscript digit one
  87      'ordm'   => q|\textordmasculine{}|,   # 0xBA - masculine ordinal indicator
  88      'raquo'  => q|\guillemotright{}|,     # 0xBB - right-pointing double angle quotation mark = right pointing guillemet
  89      'frac14' => q|\textonequarter{}|,     # 0xBC - vulgar fraction one quarter = fraction one quarter
  90      'frac12' => q|\textonehalf{}|,        # 0xBD - vulgar fraction one half = fraction one half
  91      'frac34' => q|\textthreequarters{}|,  # 0xBE - vulgar fraction three quarters = fraction three quarters
  92      'iquest' => q|\textquestiondown{}|,   # 0xBF - inverted question mark = turned question mark
  93      'Agrave' => q|\`A|,                   # 0xC0 - latin capital letter A with grave = latin capital letter A grave
  94      'Aacute' => q|\'A|,             # 0xC1 - latin capital letter A with acute
  95      'Acirc'  => q|\^A|,             # 0xC2 - latin capital letter A with circumflex
  96      'Atilde' => q|\~A|,             # 0xC3 - latin capital letter A with tilde
  97      'Auml'   => q|\"A|,             # 0xC4 - latin capital letter A with diaeresis
  98      'Aring'  => q|\AA{}|,           # 0xC5 - latin capital letter A with ring above = latin capital letter A ring
  99      'AElig'  => q|\AE{}|,           # 0xC6 - latin capital letter AE = latin capital ligature AE
 100      'Ccedil' => q|\c{C}|,           # 0xC7 - latin capital letter C with cedilla
 101      'Egrave' => q|\`E|,             # 0xC8 - latin capital letter E with grave
 102      'Eacute' => q|\'E|,             # 0xC9 - latin capital letter E with acute
 103      'Ecirc'  => q|\^E|,             # 0xCA - latin capital letter E with circumflex
 104      'Euml'   => q|\"E|,             # 0xCB - latin capital letter E with diaeresis
 105      'Igrave' => q|\`I|,             # 0xCC - latin capital letter I with grave
 106      'Iacute' => q|\'I|,             # 0xCD - latin capital letter I with acute
 107      'Icirc'  => q|\^I|,             # 0xCE - latin capital letter I with circumflex
 108      'Iuml'   => q|\"I|,             # 0xCF - latin capital letter I with diaeresis
 109      'ETH'    => q|\DH{}|,           # 0xD0 - latin capital letter ETH
 110      'Ntilde' => q|\~N|,             # 0xD1 - latin capital letter N with tilde
 111      'Ograve' => q|\`O|,             # 0xD2 - latin capital letter O with grave
 112      'Oacute' => q|\'O|,             # 0xD3 - latin capital letter O with acute
 113      'Ocirc'  => q|\^O|,             # 0xD4 - latin capital letter O with circumflex
 114      'Otilde' => q|\~O|,             # 0xD5 - latin capital letter O with tilde
 115      'Ouml'   => q|\"O|,             # 0xD6 - latin capital letter O with diaeresis
 116      'times'  => q|\texttimes{}|,    # 0xD7 - multiplication sign
 117      'Oslash' => q|\O{}|,            # 0xD8 - latin capital letter O with stroke = latin capital letter O slash
 118      'Ugrave' => q|\`U|,             # 0xD9 - latin capital letter U with grave
 119      'Uacute' => q|\'U|,             # 0xDA - latin capital letter U with acute
 120      'Ucirc'  => q|\^U|,             # 0xDB - latin capital letter U with circumflex
 121      'Uuml'   => q|\"U|,             # 0xDC - latin capital letter U with diaeresis
 122      'Yacute' => q|\'Y|,             # 0xDD - latin capital letter Y with acute
 123      'THORN'  => q|\TH{}|,           # 0xDE - latin capital letter THORN
 124      'szlig'  => q|\ss{}|,           # 0xDF - latin small letter sharp s = ess-zed
 125      'agrave' => q|\`a|,             # 0xE0 - latin small letter a with grave = latin small letter a grave
 126      'aacute' => q|\'a|,             # 0xE1 - latin small letter a with acute
 127      'acirc'  => q|\^a|,             # 0xE2 - latin small letter a with circumflex
 128      'atilde' => q|\~a|,             # 0xE3 - latin small letter a with tilde
 129      'auml'   => q|\"a|,             # 0xE4 - latin small letter a with diaeresis
 130      'aring'  => q|\aa{}|,           # 0xE5 - latin small letter a with ring above = latin small letter a ring
 131      'aelig'  => q|\ae{}|,           # 0xE6 - latin small letter ae = latin small ligature ae
 132      'ccedil' => q|\c{c}|,           # 0xE7 - latin small letter c with cedilla
 133      'egrave' => q|\`e|,             # 0xE8 - latin small letter e with grave
 134      'eacute' => q|\'e|,             # 0xE9 - latin small letter e with acute
 135      'ecirc'  => q|\^e|,             # 0xEA - latin small letter e with circumflex
 136      'euml'   => q|\"e|,             # 0xEB - latin small letter e with diaeresis
 137      'igrave' => q|\`i|,             # 0xEC - latin small letter i with grave
 138      'iacute' => q|\'i|,             # 0xED - latin small letter i with acute
 139      'icirc'  => q|\^i|,             # 0xEE - latin small letter i with circumflex
 140      'iuml'   => q|\"i|,             # 0xEF - latin small letter i with diaeresis
 141      'eth'    => q|\dh{}|,           # 0xF0 - latin small letter eth
 142      'ntilde' => q|\~n|,             # 0xF1 - latin small letter n with tilde
 143      'ograve' => q|\`o|,             # 0xF2 - latin small letter o with grave
 144      'oacute' => q|\'o|,             # 0xF3 - latin small letter o with acute
 145      'ocirc'  => q|\^o|,             # 0xF4 - latin small letter o with circumflex
 146      'otilde' => q|\~o|,             # 0xF5 - latin small letter o with tilde
 147      'ouml'   => q|\"o|,             # 0xF6 - latin small letter o with diaeresis
 148      'divide' => q|\textdiv{}|,      # 0xF7 - division sign
 149      'oslash' => q|\o{}|,            # 0xF8 - latin small letter o with stroke, = latin small letter o slash
 150      'ugrave' => q|\`u|,             # 0xF9 - latin small letter u with grave
 151      'uacute' => q|\'u|,             # 0xFA - latin small letter u with acute
 152      'ucirc'  => q|\^u|,             # 0xFB - latin small letter u with circumflex
 153      'uuml'   => q|\"u|,             # 0xFC - latin small letter u with diaeresis
 154      'yacute' => q|\'y|,             # 0xFD - latin small letter y with acute
 155      'thorn'  => q|\th{}|,           # 0xFE - latin small letter thorn
 156      'yuml'   => q|\"y|,             # 0xFF - latin small letter y with diaeresis
 157
 158      # Latin Extended-B
 159      'fnof'   => q|\textflorin{}|,   # latin small f with hook = function = florin
 160
 161      # Greek
 162      'Alpha'    => q|$\mathrm{A}$|,      # greek capital letter alpha
 163      'Beta'     => q|$\mathrm{B}$|,      # greek capital letter beta
 164      'Gamma'    => q|$\Gamma$|,          # greek capital letter gamma
 165      'Delta'    => q|$\Delta$|,          # greek capital letter delta
 166      'Epsilon'  => q|$\mathrm{E}$|,      # greek capital letter epsilon
 167      'Zeta'     => q|$\mathrm{Z}$|,      # greek capital letter zeta
 168      'Eta'      => q|$\mathrm{H}$|,      # greek capital letter eta
 169      'Theta'    => q|$\Theta$|,          # greek capital letter theta
 170      'Iota'     => q|$\mathrm{I}$|,      # greek capital letter iota
 171      'Kappa'    => q|$\mathrm{K}$|,      # greek capital letter kappa
 172      'Lambda'   => q|$\Lambda$|,         # greek capital letter lambda
 173      'Mu'       => q|$\mathrm{M}$|,      # greek capital letter mu
 174      'Nu'       => q|$\mathrm{N}$|,      # greek capital letter nu
 175      'Xi'       => q|$\Xi$|,             # greek capital letter xi
 176      'Omicron'  => q|$\mathrm{O}$|,      # greek capital letter omicron
 177      'Pi'       => q|$\Pi$|,             # greek capital letter pi
 178      'Rho'      => q|$\mathrm{R}$|,      # greek capital letter rho
 179      'Sigma'    => q|$\Sigma$|,          # greek capital letter sigma
 180      'Tau'      => q|$\mathrm{T}$|,      # greek capital letter tau
 181      'Upsilon'  => q|$\Upsilon$|,        # greek capital letter upsilon
 182      'Phi'      => q|$\Phi$|,            # greek capital letter phi
 183      'Chi'      => q|$\mathrm{X}$|,      # greek capital letter chi
 184      'Psi'      => q|$\Psi$|,            # greek capital letter psi
 185      'Omega'    => q|$\Omega$|,          # greek capital letter omega
 186
 187      'alpha'    => q|$\alpha$|,          # greek small letter alpha
 188      'beta'     => q|$\beta$|,           # greek small letter beta
 189      'gamma'    => q|$\gamma$|,          # greek small letter gamma
 190      'delta'    => q|$\delta$|,          # greek small letter delta
 191      'epsilon'  => q|$\epsilon$|,        # greek small letter epsilon
 192      'zeta'     => q|$\zeta$|,           # greek small letter zeta
 193      'eta'      => q|$\eta$|,            # greek small letter eta
 194      'theta'    => q|$\theta$|,          # greek small letter theta
 195      'iota'     => q|$\iota$|,           # greek small letter iota
 196      'kappa'    => q|$\kappa$|,          # greek small letter kappa
 197      'lambda'   => q|$\lambda$|,         # greek small letter lambda
 198      'mu'       => q|$\mu$|,             # greek small letter mu
 199      'nu'       => q|$\nu$|,             # greek small letter nu
 200      'xi'       => q|$\xi$|,             # greek small letter xi
 201      'omicron'  => q|$o$|,               # greek small letter omicron
 202      'pi'       => q|$\pi$|,             # greek small letter pi
 203      'rho'      => q|$\rho$|,            # greek small letter rho
 204 #    'sigmaf'   => q||,                  # greek small letter final sigma
 205      'sigma'    => q|$\sigma$|,          # greek small letter sigma
 206      'tau'      => q|$\tau$|,            # greek small letter tau
 207      'upsilon'  => q|$\upsilon$|,        # greek small letter upsilon
 208      'phi'      => q|$\phi$|,            # greek small letter phi
 209      'chi'      => q|$\chi$|,            # greek small letter chi
 210      'psi'      => q|$\psi$|,            # greek small letter psi
 211      'omega'    => q|$\omega$|,          # greek small letter omega
 212 #    'thetasym' => q||,                  # greek small letter theta symbol
 213 #    'upsih'    => q||,                  # greek upsilon with hook symbol
 214 #    'piv'      => q||,                  # greek pi symbol
 215
 216      # General Punctuation
 217      'bull'     => q|\textbullet{}|,     # bullet = black small circle
 218      # bullet is NOT the same as bullet operator
 219      'hellip'   => q|\textellipsis{}|,           # horizontal ellipsis = three dot leader
 220      'prime'    => q|\textquotesingle{}|,        # prime = minutes = feet
 221      'Prime'    => q|\textquotedbl{}|,           # double prime = seconds = inches
 222      'oline'    => q|\textasciimacron{}|,        # overline = spacing overscore
 223      'frasl'    => q|\textfractionsolidus{}|,    # fraction slash
 224
 225      # Letterlike Symbols
 226      'weierp'   => q|$\wp$|,                     # script capital P = power set = Weierstrass p
 227      'image'    => q|$\Re$|,                     # blackletter capital I = imaginary part
 228      'real'     => q|$\Im$|,                     # blackletter capital R = real part symbol
 229      'trade'    => q|\texttrademark{}|,          # trade mark sign
 230 #    'alefsym'  => q||,                          # alef symbol = first transfinite cardinal
 231      # alef symbol is NOT the same as hebrew letter alef, although the same
 232      # glyph could be used to depict both characters
 233
 234      # Arrows
 235      'larr'     => q|\textleftarrow{}|,          # leftwards arrow
 236      'uarr'     => q|\textuparrow{}|,            # upwards arrow
 237      'rarr'     => q|\textrightarrow{}|,         # rightwards arrow
 238      'darr'     => q|\textdownarrow{}|,          # downwards arrow
 239      'harr'     => q|$\leftrightarrow$|,         # left right arrow
 240 #    'crarr'    => q||,                          # downwards arrow with corner leftwards = carriage return
 241      'lArr'     => q|$\Leftarrow$|,              # leftwards double arrow
 242      # ISO 10646 does not say that lArr is the same as the 'is implied by'
 243      # arrow but also does not have any other character for that function. So
 244      # lArr can be used for 'is implied by' as ISOtech suggests
 245      'uArr'     => q|$\Uparrow$|,                # upwards double arrow
 246      'rArr'     => q|$\Rightarrow$|,             # rightwards double arrow
 247      # ISO 10646 does not say this is the 'implies' character but does not
 248      # have another character with this function so ? rArr can be used for
 249      # 'implies' as ISOtech suggests
 250      'dArr'     => q|$\Downarrow$|,              # downwards double arrow
 251      'hArr'     => q|$\Leftrightarrow$|,         # left right double arrow
 252
 253      # Mathematical Operators.
 254      # Some of these require the `amssymb' package.
 255      'forall'   => q|$\forall$|,                 # for all
 256      'part'     => q|$\partial$|,                # partial differential
 257      'exist'    => q|$\exists$|,                 # there exists
 258      'empty'    => q|$\emptyset$|,               # empty set = null set = diameter
 259      'nabla'    => q|$\nabla$|,                  # nabla = backward difference
 260      'isin'     => q|$\in$|,                     # element of
 261      'notin'    => q|$\notin$|,                  # not an element of
 262      'ni'       => q|$\ni$|,                     # contains as member
 263      'prod'     => q|$\prod$|,                   # n-ary product = product sign
 264      # prod is NOT the same character as 'greek capital letter pi' though the
 265      # same glyph might be used for both
 266      'sum'      => q|$\sum$|,                    # n-ary sumation
 267      # sum is NOT the same character as 'greek capital letter sigma' though
 268      # the same glyph might be used for both
 269      'minus'    => q|$-$|,                       # minus sign
 270      'lowast'   => q|$\ast$|,                    # asterisk operator
 271      'radic'    => q|$\surd$|,                   # square root = radical sign
 272      'prop'     => q|$\propto$|,                 # proportional to
 273      'infin'    => q|$\infty$|,                  # infinity
 274      'ang'      => q|$\angle$|,                  # angle
 275      'and'      => q|$\wedge$|,                  # logical and = wedge
 276      'or'       => q|$\vee$|,                    # logical or = vee
 277      'cap'      => q|$\cap$|,                    # intersection = cap
 278      'cup'      => q|$\cup$|,                    # union = cup
 279      'int'      => q|$\int$|,                    # integral
 280      'there4'   => q|$\therefore$|,              # therefore
 281      'sim'      => q|$\sim$|,                    # tilde operator = varies with = similar to
 282      # tilde operator is NOT the same character as the tilde
 283      'cong'     => q|$\cong$|,                   # approximately equal to
 284      'asymp'    => q|$\asymp$|,                  # almost equal to = asymptotic to
 285      'ne'       => q|$\neq$|,                    # not equal to
 286      'equiv'    => q|$\equiv$|,                  # identical to
 287      'le'       => q|$\leq$|,                    # less-than or equal to
 288      'ge'       => q|$\geq$|,                    # greater-than or equal to
 289      'sub'      => q|$\subset$|,                 # subset of
 290      'sup'      => q|$\supset$|,                 # superset of
 291      # note that nsup, 'not a superset of' is not covered by the Symbol font
 292      # encoding and is not included.
 293      'nsub'     => q|$\not\subset$|,             # not a subset of
 294      'sube'     => q|$\subseteq$|,               # subset of or equal to
 295      'supe'     => q|$\supseteq$|,               # superset of or equal to
 296      'oplus'    => q|$\oplus$|,                  # circled plus = direct sum
 297      'otimes'   => q|$\otimes$|,                 # circled times = vector product
 298      'perp'     => q|$\perp$|,                   # up tack = orthogonal to = perpendicular
 299      'sdot'     => q|$\cdot$|,                   # dot operator
 300      # dot operator is NOT the same character as middle dot
 301
 302      # Miscellaneous Technical
 303      'lceil'    => q|$\lceil$|,                  # left ceiling = apl upstile
 304      'rceil'    => q|$\rceil$|,                  # right ceiling
 305      'lfloor'   => q|$\lfloor$|,                 # left floor = apl downstile
 306      'rfloor'   => q|$\rfloor$|,                 # right floor
 307      'lang'     => q|$\langle$|,                 # left-pointing angle bracket = bra
 308      # lang is NOT the same character as 'less than' or 'single left-pointing
 309      # angle quotation mark'
 310      'rang'     => q|$\rangle$|,                 # right-pointing angle bracket = ket
 311      # rang is NOT the same character as 'greater than' or 'single
 312      # right-pointing angle quotation mark'
 313
 314      # Geometric Shapes
 315      'loz'      => q|$\lozenge$|,                # lozenge
 316
 317      # Miscellaneous Symbols
 318      'spades'   => q|$\spadesuit$|,              # black spade suit
 319      'clubs'    => q|$\clubsuit$|,               # black club suit = shamrock
 320      'hearts'   => q|$\heartsuit$|,              # black heart suit = valentine
 321      'diams'    => q|$\diamondsuit$|,            # black diamond suit
 322
 323      # C0 Controls and Basic Latin
 324      'quot'     => q|"|,                         # quotation mark = APL quote ["]
 325      'amp'      => q|\&|,                        # ampersand
 326      'lt'       => q|<|,                         # less-than sign
 327      'gt'       => q|>|,                         # greater-than sign
 328      'OElig'    => q|\OE{}|,                     # latin capital ligature OE
 329      'oelig'    => q|\oe{}|,                     # latin small ligature oe
 330      'Scaron'   => q|\v{S}|,                     # latin capital letter S with caron
 331      'scaron'   => q|\v{s}|,                     # latin small letter s with caron
 332      'Yuml'     => q|\"Y|,                       # latin capital letter Y with diaeresis
 333      'circ'     => q|\textasciicircum{}|,        # modifier letter circumflex accent
 334      'tilde'    => q|\textasciitilde{}|,         # small tilde
 335      'ensp'     => q|\phantom{n}|,               # en space
 336      'emsp'     => q|\hspace{1em}|,              # em space
 337      'thinsp'   => q|\,|,                        # thin space
 338      'zwnj'     => q|{}|,                        # zero width non-joiner
 339 #    'zwj'      => q||,                          # zero width joiner
 340 #    'lrm'      => q||,                          # left-to-right mark
 341 #    'rlm'      => q||,                          # right-to-left mark
 342      'ndash'    => q|--|,                        # en dash
 343      'mdash'    => q|---|,                       # em dash
 344      'lsquo'    => q|\textquoteleft{}|,          # left single quotation mark
 345      'rsquo'    => q|\textquoteright{}|,         # right single quotation mark
 346      'sbquo'    => q|\quotesinglbase{}|,         # single low-9 quotation mark
 347      'ldquo'    => q|\textquotedblleft{}|,       # left double quotation mark
 348      'rdquo'    => q|\textquotedblright{}|,      # right double quotation mark
 349      'bdquo'    => q|\quotedblbase{}|,           # double low-9 quotation mark
 350      'dagger'   => q|\textdagger{}|,             # dagger
 351      'Dagger'   => q|\textdaggerdbl{}|,          # double dagger
 352      'permil'   => q|\textperthousand{}|,        # per mille sign
 353      'lsaquo'   => q|\guilsinglleft{}|,          # single left-pointing angle quotation mark
 354      'rsaquo'   => q|\guilsinglright{}|,         # single right-pointing angle quotation mark
 355      'euro'     => q|\texteuro{}|,               # euro sign
 356 );
 357
 358 =head1 OBJECT METHODS
 359
 360 The following methods are provided in this module. Methods inherited
 361 from C<Pod::Select> are not described in the public interface.
 362
 363 =over 4
 364
 365 =begin __PRIVATE__
 366
 367 =item C<initialize>
 368
 369 Initialise the object. This method is subclassed from C<Pod::Parser>.
 370 The base class method is invoked. This method defines the default
 371 behaviour of the object unless overridden by supplying arguments to
 372 the constructor.
 373
 374 Internal settings are defaulted as well as the public instance data.
 375 Internal hash values are accessed directly (rather than through
 376 a method) and start with an underscore.
 377
 378 This method should not be invoked by the user directly.
 379
 380 =end __PRIVATE__
 381
 382 =cut
 383
 384
 385
 386 #   - An array for nested lists
 387
 388 # Arguments have already been read by this point
 389
 390 sub initialize {
 391   my $self = shift;
 392
 393   # print Dumper($self);
 394
 395   # Internals
 396   $self->{_Lists} = [];             # For nested lists
 397   $self->{_suppress_all_para}  = 0; # For =begin blocks
 398   $self->{_dont_modify_any_para}=0; # For =begin blocks
 399   $self->{_CURRENT_HEAD1}   = '';   # Name of current HEAD1 section
 400
 401   # Options - only initialise if not already set
 402
 403   # Cause the '=head1 NAME' field to be treated specially
 404   # The contents of the NAME paragraph will be converted
 405   # to a section title. All subsequent =head1 will be converted
 406   # to =head2 and down. Will not affect =head1's prior to NAME
 407   # Assumes:  'Module - purpose' format
 408   # Also creates a purpose field
 409   # The name is used for Labeling of the subsequent subsections
 410   $self->{ReplaceNAMEwithSection} = 0
 411     unless exists $self->{ReplaceNAMEwithSection};
 412   $self->{AddPreamble}      = 1    # make full latex document
 413     unless exists $self->{AddPreamble};
 414   $self->{StartWithNewPage} = 0    # Start new page for pod section
 415     unless exists $self->{StartWithNewPage};
 416   $self->{TableOfContents}  = 0    # Add table of contents
 417     unless exists $self->{TableOfContents};  # only relevent if AddPreamble=1
 418    $self->{AddPostamble}     = 1          # Add closing latex code at end
 419     unless exists $self->{AddPostamble}; #  effectively end{document} and index
 420   $self->{MakeIndex}        = 1         # Add index (only relevant AddPostamble
 421     unless exists $self->{MakeIndex};   # and AddPreamble)
 422
 423   $self->{UniqueLabels}     = 1          # Use label unique for each pod
 424     unless exists $self->{UniqueLabels}; # either based on the filename
 425                                          # or supplied
 426
 427   # Control the level of =head1. default is \section
 428   #
 429   $self->{Head1Level}     = 1   # Offset in latex sections
 430     unless exists $self->{Head1Level}; # 0 is chapter, 2 is subsection
 431
 432   # Control at which level numbering of sections is turned off
 433   # ie subsection becomes subsection*
 434   # The numbering is relative to the latex sectioning commands
 435   # and is independent of Pod heading level
 436   # default is to number \section but not \subsection
 437   $self->{LevelNoNum} = 2
 438     unless exists $self->{LevelNoNum};
 439
 440   # Label to be used as prefix to all internal section names
 441   # If not defined will attempt to derive it from the filename
 442   # This can not happen when running parse_from_filehandle though
 443   # hence the ability to set the label externally
 444   # The label could then be Pod::Parser_DESCRIPTION or somesuch
 445
 446   $self->{Label}            = undef # label to be used as prefix
 447     unless exists $self->{Label};   # to all internal section names
 448
 449   # These allow the caller to add arbritrary latex code to
 450   # start and end of document. AddPreamble and AddPostamble are ignored
 451   # if these are set.
 452   # Also MakeIndex and TableOfContents are also ignored.
 453   $self->{UserPreamble}     = undef # User supplied start (AddPreamble =1)
 454     unless exists $self->{Label};
 455   $self->{UserPostamble}    = undef # Use supplied end    (AddPostamble=1)
 456     unless exists $self->{Label};
 457
 458   # Run base initialize
 459   $self->SUPER::initialize;
 460
 461 }
 462
 463 =back
 464
 465 =head2 Data Accessors
 466
 467 The following methods are provided for accessing instance data. These
 468 methods should be used for accessing configuration parameters rather
 469 than assuming the object is a hash.
 470
 471 Default values can be supplied by using these names as keys to a hash
 472 of arguments when using the C<new()> constructor.
 473
 474 =over 4
 475
 476 =item B<AddPreamble>
 477
 478 Logical to control whether a C<latex> preamble is to be written.
 479 If true, a valid C<latex> preamble is written before the pod data is written.
 480 This is similar to:
 481
 482   \documentclass{article}
 483   \usepackage[T1]{fontenc}
 484   \usepackage{textcomp}
 485   \begin{document}
 486
 487 but will be more complicated if table of contents and indexing are required.
 488 Can be used to set or retrieve the current value.
 489
 490   $add = $parser->AddPreamble();
 491   $parser->AddPreamble(1);
 492
 493 If used in conjunction with C<AddPostamble> a full latex document will
 494 be written that could be immediately processed by C<latex>.
 495
 496 For some pod escapes it may be necessary to include the amsmath
 497 package. This is not yet added to the preamble automaatically.
 498
 499 =cut
 500
 501 sub AddPreamble {
 502    my $self = shift;
 503    if (@_) {
 504      $self->{AddPreamble} = shift;
 505    }
 506    return $self->{AddPreamble};
 507 }
 508
 509 =item B<AddPostamble>
 510
 511 Logical to control whether a standard C<latex> ending is written to the output
 512 file after the document has been processed.
 513 In its simplest form this is simply:
 514
 515   \end{document}
 516
 517 but can be more complicated if a index is required.
 518 Can be used to set or retrieve the current value.
 519
 520   $add = $parser->AddPostamble();
 521   $parser->AddPostamble(1);
 522
 523 If used in conjunction with C<AddPreaamble> a full latex document will
 524 be written that could be immediately processed by C<latex>.
 525
 526 =cut
 527
 528 sub AddPostamble {
 529    my $self = shift;
 530    if (@_) {
 531      $self->{AddPostamble} = shift;
 532    }
 533    return $self->{AddPostamble};
 534 }
 535
 536 =item B<Head1Level>
 537
 538 The C<latex> sectioning level that should be used to correspond to
 539 a pod C<=head1> directive. This can be used, for example, to turn
 540 a C<=head1> into a C<latex> C<subsection>. This should hold a number
 541 corresponding to the required position in an array containing the
 542 following elements:
 543
 544  [0] chapter
 545  [1] section
 546  [2] subsection
 547  [3] subsubsection
 548  [4] paragraph
 549  [5] subparagraph
 550
 551 Can be used to set or retrieve the current value:
 552
 553   $parser->Head1Level(2);
 554   $sect = $parser->Head1Level;
 555
 556 Setting this number too high can result in sections that may not be reproducible
 557 in the expected way. For example, setting this to 4 would imply that C<=head3>
 558 do not have a corresponding C<latex> section (C<=head1> would correspond to
 559 a C<paragraph>).
 560
 561 A check is made to ensure that the supplied value is an integer in the
 562 range 0 to 5.
 563
 564 Default is for a value of 1 (i.e. a C<section>).
 565
 566 =cut
 567
 568 sub Head1Level {
 569    my $self = shift;
 570    if (@_) {
 571      my $arg = shift;
 572      if ($arg =~ /^\d$/ && $arg <= $#LatexSections) {
 573        $self->{Head1Level} = $arg;
 574      } else {
 575        carp "Head1Level supplied ($arg) must be integer in range 0 to ".$#LatexSections . "- Ignoring\n";
 576      }
 577    }
 578    return $self->{Head1Level};
 579 }
 580
 581 =item B<Label>
 582
 583 This is the label that is prefixed to all C<latex> label and index
 584 entries to make them unique. In general, pods have similarly titled
 585 sections (NAME, DESCRIPTION etc) and a C<latex> label will be multiply
 586 defined if more than one pod document is to be included in a single
 587 C<latex> file. To overcome this, this label is prefixed to a label
 588 whenever a label is required (joined with an underscore) or to an
 589 index entry (joined by an exclamation mark which is the normal index
 590 separator). For example, C<\label{text}> becomes C<\label{Label_text}>.
 591
 592 Can be used to set or retrieve the current value:
 593
 594   $label = $parser->Label;
 595   $parser->Label($label);
 596
 597 This label is only used if C<UniqueLabels> is true.
 598 Its value is set automatically from the C<NAME> field
 599 if C<ReplaceNAMEwithSection> is true. If this is not the case
 600 it must be set manually before starting the parse.
 601
 602 Default value is C<undef>.
 603
 604 =cut
 605
 606 sub Label {
 607    my $self = shift;
 608    if (@_) {
 609      $self->{Label} = shift;
 610    }
 611    return $self->{Label};
 612 }
 613
 614 =item B<LevelNoNum>
 615
 616 Control the point at which C<latex> section numbering is turned off.
 617 For example, this can be used to make sure that C<latex> sections
 618 are numbered but subsections are not.
 619
 620 Can be used to set or retrieve the current value:
 621
 622   $lev = $parser->LevelNoNum;
 623   $parser->LevelNoNum(2);
 624
 625 The argument must be an integer between 0 and 5 and is the same as the
 626 number described in C<Head1Level> method description. The number has
 627 nothing to do with the pod heading number, only the C<latex> sectioning.
 628
 629 Default is 2. (i.e. C<latex> subsections are written as C<subsection*>
 630 but sections are numbered).
 631
 632 =cut
 633
 634 sub LevelNoNum {
 635    my $self = shift;
 636    if (@_) {
 637      $self->{LevelNoNum} = shift;
 638    }
 639    return $self->{LevelNoNum};
 640 }
 641
 642 =item B<MakeIndex>
 643
 644 Controls whether C<latex> commands for creating an index are to be inserted
 645 into the preamble and postamble
 646
 647   $makeindex = $parser->MakeIndex;
 648   $parser->MakeIndex(0);
 649
 650 Irrelevant if both C<AddPreamble> and C<AddPostamble> are false (or equivalently,
 651 C<UserPreamble> and C<UserPostamble> are set).
 652
 653 Default is for an index to be created.
 654
 655 =cut
 656
 657 sub MakeIndex {
 658    my $self = shift;
 659    if (@_) {
 660      $self->{MakeIndex} = shift;
 661    }
 662    return $self->{MakeIndex};
 663 }
 664
 665 =item B<ReplaceNAMEwithSection>
 666
 667 This controls whether the C<NAME> section in the pod is to be translated
 668 literally or converted to a slightly modified output where the section
 669 name is the pod name rather than "NAME".
 670
 671 If true, the pod segment
 672
 673   =head1 NAME
 674
 675   pod::name - purpose
 676
 677   =head1 SYNOPSIS
 678
 679 is converted to the C<latex>
 680
 681   \section{pod::name\label{pod_name}\index{pod::name}}
 682
 683   Purpose
 684
 685   \subsection*{SYNOPSIS\label{pod_name_SYNOPSIS}%
 686                \index{pod::name!SYNOPSIS}}
 687
 688 (dependent on the value of C<Head1Level> and C<LevelNoNum>). Note that
 689 subsequent C<head1> directives translate to subsections rather than
 690 sections and that the labels and index now include the pod name (dependent
 691 on the value of C<UniqueLabels>).
 692
 693 The C<Label> is set from the pod name regardless of any current value
 694 of C<Label>.
 695
 696   $mod = $parser->ReplaceNAMEwithSection;
 697   $parser->ReplaceNAMEwithSection(0);
 698
 699 Default is to translate the pod literally.
 700
 701 =cut
 702
 703 sub ReplaceNAMEwithSection {
 704    my $self = shift;
 705    if (@_) {
 706      $self->{ReplaceNAMEwithSection} = shift;
 707    }
 708    return $self->{ReplaceNAMEwithSection};
 709 }
 710
 711 =item B<StartWithNewPage>
 712
 713 If true, each pod translation will begin with a C<latex>
 714 C<\clearpage>.
 715
 716   $parser->StartWithNewPage(1);
 717   $newpage = $parser->StartWithNewPage;
 718
 719 Default is false.
 720
 721 =cut
 722
 723 sub StartWithNewPage {
 724    my $self = shift;
 725    if (@_) {
 726      $self->{StartWithNewPage} = shift;
 727    }
 728    return $self->{StartWithNewPage};
 729 }
 730
 731 =item B<TableOfContents>
 732
 733 If true, a table of contents will be created.
 734 Irrelevant if C<AddPreamble> is false or C<UserPreamble>
 735 is set.
 736
 737   $toc = $parser->TableOfContents;
 738   $parser->TableOfContents(1);
 739
 740 Default is false.
 741
 742 =cut
 743
 744 sub TableOfContents {
 745    my $self = shift;
 746    if (@_) {
 747      $self->{TableOfContents} = shift;
 748    }
 749    return $self->{TableOfContents};
 750 }
 751
 752 =item B<UniqueLabels>
 753
 754 If true, the translator will attempt to make sure that
 755 each C<latex> label or index entry will be uniquely identified
 756 by prefixing the contents of C<Label>. This allows
 757 multiple documents to be combined without clashing
 758 common labels such as C<DESCRIPTION> and C<SYNOPSIS>
 759
 760   $parser->UniqueLabels(1);
 761   $unq = $parser->UniqueLabels;
 762
 763 Default is true.
 764
 765 =cut
 766
 767 sub UniqueLabels {
 768    my $self = shift;
 769    if (@_) {
 770      $self->{UniqueLabels} = shift;
 771    }
 772    return $self->{UniqueLabels};
 773 }
 774
 775 =item B<UserPreamble>
 776
 777 User supplied C<latex> preamble. Added before the pod translation
 778 data.
 779
 780 If set, the contents will be prepended to the output file before the translated
 781 data regardless of the value of C<AddPreamble>.
 782 C<MakeIndex> and C<TableOfContents> will also be ignored.
 783
 784 =cut
 785
 786 sub UserPreamble {
 787    my $self = shift;
 788    if (@_) {
 789      $self->{UserPreamble} = shift;
 790    }
 791    return $self->{UserPreamble};
 792 }
 793
 794 =item B<UserPostamble>
 795
 796 User supplied C<latex> postamble. Added after the pod translation
 797 data.
 798
 799 If set, the contents will be prepended to the output file after the translated
 800 data regardless of the value of C<AddPostamble>.
 801 C<MakeIndex> will also be ignored.
 802
 803 =cut
 804
 805 sub UserPostamble {
 806    my $self = shift;
 807    if (@_) {
 808      $self->{UserPostamble} = shift;
 809    }
 810    return $self->{UserPostamble};
 811 }
 812
 813 =begin __PRIVATE__
 814
 815 =item B<Lists>
 816
 817 Contains details of the currently active lists.
 818   The array contains C<Pod::List> objects. A new C<Pod::List>
 819 object is created each time a list is encountered and it is
 820 pushed onto this stack. When the list context ends, it
 821 is popped from the stack. The array will be empty if no
 822 lists are active.
 823
 824 Returns array of list information in list context
 825 Returns array ref in scalar context
 826
 827 =cut
 828
 829
 830
 831 sub lists {
 832   my $self = shift;
 833   return @{ $self->{_Lists} } if wantarray();
 834   return $self->{_Lists};
 835 }
 836
 837 =end __PRIVATE__
 838
 839 =back
 840
 841 =begin __PRIVATE__
 842
 843 =head2 Subclassed methods
 844
 845 The following methods override methods provided in the C<Pod::Select>
 846 base class. See C<Pod::Parser> and C<Pod::Select> for more information
 847 on what these methods require.
 848
 849 =over 4
 850
 851 =cut
 852
 853 ######### END ACCESSORS ###################
 854
 855 # Opening pod
 856
 857 =item B<begin_pod>
 858
 859 Writes the C<latex> preamble if requested. Only writes something
 860 if AddPreamble is true. Writes a standard header unless a UserPreamble
 861 is defined.
 862
 863 =cut
 864
 865 sub begin_pod {
 866   my $self = shift;
 867
 868   # Get the pod identification
 869   # This should really come from the '=head1 NAME' paragraph
 870
 871   my $infile = $self->input_file;
 872   my $class = ref($self);
 873   my $date = gmtime(time);
 874
 875   # Comment message to say where this came from
 876   my $comment = << "__TEX_COMMENT__";
 877 %%  Latex generated from POD in document $infile
 878 %%  Using the perl module $class
 879 %%  Converted on $date
 880 __TEX_COMMENT__
 881
 882   # Write the preamble
 883   # If the caller has supplied one then we just use that
 884
 885   my $preamble = '';
 886
 887   if ($self->AddPreamble) {
 888
 889     if (defined $self->UserPreamble) {
 890
 891       $preamble = $self->UserPreamble;
 892
 893       # Add the description of where this came from
 894       $preamble .=  "\n$comment\n%%  Preamble supplied by user.\n\n";
 895
 896     } else {
 897
 898       # Write our own preamble
 899
 900       # Code to initialise index making
 901       # Use an array so that we can prepend comment if required
 902       my @makeidx = (
 903                      '\usepackage{makeidx}',
 904                      '\makeindex',
 905                     );
 906
 907       unless ($self->MakeIndex) {
 908         foreach (@makeidx) {
 909           $_ = '%% ' . $_;
 910         }
 911       }
 912       my $makeindex = join("\n",@makeidx) . "\n";
 913
 914       # Table of contents
 915       my $tableofcontents = '\tableofcontents';
 916
 917       $tableofcontents = '%% ' . $tableofcontents
 918         unless $self->TableOfContents;
 919
 920       # Roll our own
 921       $preamble = << "__TEX_HEADER__";
 922 \\documentclass{article}
 923 \\usepackage[T1]{fontenc}
 924 \\usepackage{textcomp}
 925
 926 $comment
 927
 928 $makeindex
 929
 930 \\begin{document}
 931
 932 $tableofcontents
 933
 934 __TEX_HEADER__
 935
 936     }
 937   }
 938
 939   # Write the header (blank if none)
 940   $self->_output($preamble);
 941
 942   # Start on new page if requested
 943   $self->_output("\\clearpage\n") if $self->StartWithNewPage;
 944
 945 }
 946
 947
 948 =item B<end_pod>
 949
 950 Write the closing C<latex> code. Only writes something if AddPostamble
 951 is true. Writes a standard header unless a UserPostamble is defined.
 952
 953 =cut
 954
 955 sub end_pod {
 956   my $self = shift;
 957
 958   # End string
 959   my $end = '';
 960
 961   # Use the user version of the postamble if defined
 962   if ($self->AddPostamble) {
 963
 964     if (defined $self->UserPostamble) {
 965       $end = $self->UserPostamble;
 966
 967     } else {
 968
 969       # Check for index
 970       my $makeindex = '\printindex';
 971
 972       $makeindex = '%% '. $makeindex  unless $self->MakeIndex;
 973
 974       $end = "$makeindex\n\n\\end{document}\n";
 975     }
 976   }
 977
 978   $self->_output($end);
 979
 980 }
 981
 982 =item B<command>
 983
 984 Process basic pod commands.
 985
 986 =cut
 987
 988 sub command {
 989   my $self = shift;
 990   my ($command, $paragraph, $line_num, $parobj) = @_;
 991
 992   # return if we dont care
 993   return if $command eq 'pod';
 994
 995   # Store a copy of the raw text in case we are in a =for
 996   # block and need to preserve the existing latex
 997   my $rawpara = $paragraph;
 998
 999   # Do the latex escapes
1000   $paragraph = $self->_replace_special_chars($paragraph);
1001
1002   # Interpolate pod sequences in paragraph
1003   $paragraph = $self->interpolate($paragraph, $line_num);
1004   $paragraph =~ s/\s+$//;
1005
1006   # Replace characters that can only be done after
1007   # interpolation of interior sequences
1008   $paragraph = $self->_replace_special_chars_late($paragraph);
1009
1010   # Now run the command
1011   if ($command eq 'over') {
1012
1013     $self->begin_list($paragraph, $line_num);
1014
1015   } elsif ($command eq 'item') {
1016
1017     $self->add_item($paragraph, $line_num);
1018
1019   } elsif ($command eq 'back') {
1020
1021     $self->end_list($line_num);
1022
1023   } elsif ($command eq 'head1') {
1024
1025     # Store the name of the section
1026     $self->{_CURRENT_HEAD1} = $paragraph;
1027
1028     # Print it
1029     $self->head(1, $paragraph, $parobj);
1030
1031   } elsif ($command eq 'head2') {
1032
1033     $self->head(2, $paragraph, $parobj);
1034
1035   } elsif ($command eq 'head3') {
1036
1037     $self->head(3, $paragraph, $parobj);
1038
1039   } elsif ($command eq 'head4') {
1040
1041     $self->head(4, $paragraph, $parobj);
1042
1043   } elsif ($command eq 'head5') {
1044
1045     $self->head(5, $paragraph, $parobj);
1046
1047   } elsif ($command eq 'head6') {
1048
1049     $self->head(6, $paragraph, $parobj);
1050
1051   } elsif ($command eq 'begin') {
1052
1053     # pass through if latex
1054     if ($paragraph =~ /^latex/i) {
1055       # Make sure that subsequent paragraphs are not modfied before printing
1056       $self->{_dont_modify_any_para} = 1;
1057
1058     } else {
1059       # Suppress all subsequent paragraphs unless
1060       # it is explcitly intended for latex
1061       $self->{_suppress_all_para} = 1;
1062     }
1063
1064   } elsif ($command eq 'for') {
1065
1066     # =for latex
1067     #   some latex
1068
1069     # With =for we will get the text for the full paragraph
1070     # as well as the format name.
1071     # We do not get an additional paragraph later on. The next
1072     # paragraph is not governed by the =for
1073
1074     # The first line contains the format and the rest is the
1075     # raw code.
1076     my ($format, $chunk) = split(/\n/, $rawpara, 2);
1077
1078     # If we have got some latex code print it out immediately
1079     # unmodified. Else do nothing.
1080     if ($format =~ /^latex/i) {
1081       # Make sure that next paragraph is not modfied before printing
1082       $self->_output( $chunk );
1083
1084     }
1085
1086   } elsif ($command eq 'end') {
1087
1088     # Reset suppression
1089     $self->{_suppress_all_para} = 0;
1090     $self->{_dont_modify_any_para} = 0;
1091
1092   } elsif ($command eq 'pod') {
1093
1094     # Do nothing
1095
1096   } else {
1097     carp "Command $command not recognised at line $line_num\n";
1098   }
1099
1100 }
1101
1102 =item B<verbatim>
1103
1104 Verbatim text
1105
1106 =cut
1107
1108 sub verbatim {
1109   my $self = shift;
1110   my ($paragraph, $line_num, $parobj) = @_;
1111
1112   # Expand paragraph unless in =begin block
1113   if ($self->{_dont_modify_any_para}) {
1114     # Just print as is
1115     $self->_output($paragraph);
1116
1117   } else {
1118
1119     return if $paragraph =~ /^\s+$/;
1120
1121     # Clean trailing space
1122     $paragraph =~ s/\s+$//;
1123
1124     # Clean tabs. Routine taken from Tabs.pm
1125     # by David Muir Sharnoff muir@idiom.com,
1126     # slightly modified by hsmyers@sdragons.com 10/22/01
1127     my @l = split("\n",$paragraph);
1128     foreach (@l) {
1129       1 while s/(^|\n)([^\t\n]*)(\t+)/
1130         $1. $2 . (" " x
1131                   (8 * length($3)
1132                    - (length($2) % 8)))
1133           /sex;
1134     }
1135     $paragraph = join("\n",@l);
1136     # End of change.
1137
1138
1139
1140     $self->_output('\begin{verbatim}' . "\n$paragraph\n". '\end{verbatim}'."\n");
1141   }
1142 }
1143
1144 =item B<textblock>
1145
1146 Plain text paragraph.
1147
1148 =cut
1149
1150 sub textblock {
1151   my $self = shift;
1152   my ($paragraph, $line_num, $parobj) = @_;
1153
1154   # print Dumper($self);
1155
1156   # Expand paragraph unless in =begin block
1157   if ($self->{_dont_modify_any_para}) {
1158     # Just print as is
1159     $self->_output($paragraph);
1160
1161     return;
1162   }
1163
1164
1165   # Escape latex special characters
1166   $paragraph = $self->_replace_special_chars($paragraph);
1167
1168   # Interpolate interior sequences
1169   my $expansion = $self->interpolate($paragraph, $line_num);
1170   $expansion =~ s/\s+$//;
1171
1172   # Escape special characters that can not be done earlier
1173   $expansion = $self->_replace_special_chars_late($expansion);
1174
1175   # If we are replacing 'head1 NAME' with a section
1176   # we need to look in the paragraph and rewrite things
1177   # Need to make sure this is called only on the first paragraph
1178   # following 'head1 NAME' and not on subsequent paragraphs that may be
1179   # present.
1180   if ($self->{_CURRENT_HEAD1} =~ /^NAME/i && $self->ReplaceNAMEwithSection()) {
1181
1182     # Strip white space from start and end
1183     $paragraph =~ s/^\s+//;
1184     $paragraph =~ s/\s$//;
1185
1186     # Split the string into 2 parts
1187     my ($name, $purpose) = split(/\s+-\s+/, $expansion,2);
1188
1189     # Now prevent this from triggering until a new head1 NAME is set
1190     $self->{_CURRENT_HEAD1} = '_NAME';
1191
1192     # Might want to clear the Label() before doing this (CHECK)
1193
1194     # Print the heading
1195     $self->head(1, $name, $parobj);
1196
1197     # Set the labeling in case we want unique names later
1198     $self->Label( $self->_create_label( $name, 1 ) );
1199
1200     # Raise the Head1Level by one so that subsequent =head1 appear
1201     # as subsections of the main name section unless we are already
1202     # at maximum [Head1Level() could check this itself - CHECK]
1203     $self->Head1Level( $self->Head1Level() + 1)
1204       unless $self->Head1Level == $#LatexSections;
1205
1206     # Now write out the new latex paragraph
1207     $purpose = ucfirst($purpose);
1208     $self->_output("\n\n$purpose\n\n");
1209
1210   } else {
1211     # Just write the output
1212     $self->_output("\n\n$expansion\n\n");
1213   }
1214
1215 }
1216
1217 =item B<interior_sequence>
1218
1219 Interior sequence expansion
1220
1221 =cut
1222
1223 sub interior_sequence {
1224   my $self = shift;
1225
1226   my ($seq_command, $seq_argument, $pod_seq) = @_;
1227
1228   if ($seq_command eq 'B') {
1229     return "\\textbf{$seq_argument}";
1230
1231   } elsif ($seq_command eq 'I') {
1232     return "\\textit{$seq_argument}";
1233
1234   } elsif ($seq_command eq 'E') {
1235
1236     # If it is simply a number
1237     if ($seq_argument =~ /^\d+$/) {
1238       return chr($seq_argument);
1239     # Look up escape in hash table
1240     } elsif (exists $HTML_Escapes{$seq_argument}) {
1241       return $HTML_Escapes{$seq_argument};
1242
1243     } else {
1244       my ($file, $line) = $pod_seq->file_line();
1245       warn "Escape sequence $seq_argument not recognised at line $line of file $file\n";
1246       return;
1247     }
1248
1249   } elsif ($seq_command eq 'Z') {
1250
1251     # Zero width space
1252     return '{}';
1253
1254   } elsif ($seq_command eq 'C') {
1255     return "\\texttt{$seq_argument}";
1256
1257   } elsif ($seq_command eq 'F') {
1258     return "\\emph{$seq_argument}";
1259
1260   } elsif ($seq_command eq 'S') {
1261     # non breakable spaces
1262     my $nbsp = '~';
1263
1264     $seq_argument =~ s/\s/$nbsp/g;
1265     return $seq_argument;
1266
1267   } elsif ($seq_command eq 'L') {
1268     my $link = new Pod::Hyperlink($seq_argument);
1269
1270     # undef on failure
1271     unless (defined $link) {
1272       carp $@;
1273       return;
1274     }
1275
1276     # Handle internal links differently
1277     my $type = $link->type;
1278     my $page = $link->page;
1279
1280     if ($type eq 'section' && $page eq '') {
1281       # Use internal latex reference
1282       my $node = $link->node;
1283
1284       # Convert to a label
1285       $node = $self->_create_label($node);
1286
1287       return "\\S\\ref{$node}";
1288
1289     } else {
1290       # Use default markup for external references
1291       # (although Starlink would use \xlabel)
1292       my $markup = $link->markup;
1293       my ($file, $line) = $pod_seq->file_line();
1294
1295       return $self->interpolate($link->markup, $line);
1296     }
1297
1298
1299
1300   } elsif ($seq_command eq 'P') {
1301     # Special markup for Pod::Hyperlink
1302     # Replace :: with / - but not sure if I want to do this
1303     # any more.
1304     my $link = $seq_argument;
1305     $link =~ s|::|/|g;
1306
1307     my $ref = "\\emph{$seq_argument}";
1308     return $ref;
1309
1310   } elsif ($seq_command eq 'Q') {
1311     # Special markup for Pod::Hyperlink
1312     return "\\textsf{$seq_argument}";
1313
1314   } elsif ($seq_command eq 'X') {
1315     # Index entries
1316
1317     # use \index command
1318     # I will let '!' go through for now
1319     # not sure how sub categories are handled in X<>
1320     my $index = $self->_create_index($seq_argument);
1321     return "\\index{$index}\n";
1322
1323   } else {
1324     carp "Unknown sequence $seq_command<$seq_argument>";
1325   }
1326
1327 }
1328
1329 =back
1330
1331 =head2 List Methods
1332
1333 Methods used to handle lists.
1334
1335 =over 4
1336
1337 =item B<begin_list>
1338
1339 Called when a new list is found (via the C<over> directive).
1340 Creates a new C<Pod::List> object and stores it on the
1341 list stack.
1342
1343   $parser->begin_list($indent, $line_num);
1344
1345 =cut
1346
1347 sub begin_list {
1348   my $self = shift;
1349   my $indent = shift;
1350   my $line_num = shift;
1351
1352   # Indicate that a list should be started for the next item
1353   # need to do this to work out the type of list
1354   push ( @{$self->lists}, new Pod::List(-indent => $indent,
1355                                         -start => $line_num,
1356                                         -file => $self->input_file,
1357                                        )
1358        );
1359
1360 }
1361
1362 =item B<end_list>
1363
1364 Called when the end of a list is found (the C<back> directive).
1365 Pops the C<Pod::List> object off the stack of lists and writes
1366 the C<latex> code required to close a list.
1367
1368   $parser->end_list($line_num);
1369
1370 =cut
1371
1372 sub end_list {
1373   my $self = shift;
1374   my $line_num = shift;
1375
1376   unless (defined $self->lists->[-1]) {
1377     my $file = $self->input_file;
1378     warn "No list is active at line $line_num (file=$file). Missing =over?\n";
1379     return;
1380   }
1381
1382   # What to write depends on list type
1383   my $type = $self->lists->[-1]->type;
1384
1385   # Dont write anything if the list type is not set
1386   # iomplying that a list was created but no entries were
1387   # placed in it (eg because of a =begin/=end combination)
1388   $self->_output("\\end{$type}\n")
1389     if (defined $type && length($type) > 0);
1390
1391   # Clear list
1392   pop(@{ $self->lists});
1393
1394 }
1395
1396 =item B<add_item>
1397
1398 Add items to the list. The first time an item is encountered
1399 (determined from the state of the current C<Pod::List> object)
1400 the type of list is determined (ordered, unnumbered or description)
1401 and the relevant latex code issued.
1402
1403   $parser->add_item($paragraph, $line_num);
1404
1405 =cut
1406
1407 sub add_item {
1408   my $self = shift;
1409   my $paragraph = shift;
1410   my $line_num = shift;
1411
1412   unless (defined $self->lists->[-1]) {
1413     my $file = $self->input_file;
1414     warn "List has already ended by line $line_num of file $file. Missing =over?\n";
1415     # Replace special chars
1416 #    $paragraph = $self->_replace_special_chars($paragraph);
1417     $self->_output("$paragraph\n\n");
1418     return;
1419   }
1420
1421   # If paragraphs printing is turned off via =begin/=end or whatver
1422   # simply return immediately
1423   return if $self->{_suppress_all_para};
1424
1425   # Check to see whether we are starting a new lists
1426   if (scalar($self->lists->[-1]->item) == 0) {
1427
1428     # Examine the paragraph to determine what type of list
1429     # we have
1430     $paragraph =~ s/\s+$//;
1431     $paragraph =~ s/^\s+//;
1432
1433     my $type;
1434     if (substr($paragraph, 0,1) eq '*') {
1435       $type = 'itemize';
1436     } elsif ($paragraph =~ /^\d/) {
1437       $type = 'enumerate';
1438     } else {
1439       $type = 'description';
1440     }
1441     $self->lists->[-1]->type($type);
1442
1443     $self->_output("\\begin{$type}\n");
1444
1445   }
1446
1447   my $type = $self->lists->[-1]->type;
1448
1449   if ($type eq 'description') {
1450     # Handle long items - long items do not wrap
1451     # If the string is longer than 40 characters we split
1452     # it into a real item header and some bold text.
1453     my $maxlen = 40;
1454     my ($hunk1, $hunk2) = $self->_split_delimited( $paragraph, $maxlen );
1455
1456     # Print the first hunk
1457     $self->_output("\n\\item[{$hunk1}] ");
1458
1459     # and the second hunk if it is defined
1460     if ($hunk2) {
1461       $self->_output("\\textbf{$hunk2}");
1462     } else {
1463       # Not there so make sure we have a new line
1464       $self->_output("\\mbox{}");
1465     }
1466
1467   } else {
1468     # If the item was '* Something' or '\d+ something' we still need to write
1469     # out the something. Also allow 1) and 1.
1470     my $extra_info = $paragraph;
1471     $extra_info =~ s/^(\*|\d+[\.\)]?)\s*//;
1472     $self->_output("\n\\item $extra_info");
1473   }
1474
1475   # Store the item name in the object. Required so that
1476   # we can tell if the list is new or not
1477   $self->lists->[-1]->item($paragraph);
1478
1479 }
1480
1481 =back
1482
1483 =head2 Methods for headings
1484
1485 =over 4
1486
1487 =item B<head>
1488
1489 Print a heading of the required level.
1490
1491   $parser->head($level, $paragraph, $parobj);
1492
1493 The first argument is the pod heading level. The second argument
1494 is the contents of the heading. The 3rd argument is a Pod::Paragraph
1495 object so that the line number can be extracted.
1496
1497 =cut
1498
1499 sub head {
1500   my $self = shift;
1501   my $num = shift;
1502   my $paragraph = shift;
1503   my $parobj = shift;
1504
1505   # If we are replace 'head1 NAME' with a section
1506   # we return immediately if we get it
1507   return
1508     if ($self->{_CURRENT_HEAD1} =~ /^NAME/i && $self->ReplaceNAMEwithSection());
1509
1510   # Create a label
1511   my $label = $self->_create_label($paragraph);
1512
1513   # Create an index entry
1514   my $index = $self->_create_index($paragraph);
1515
1516   # Work out position in the above array taking into account
1517   # that =head1 is equivalent to $self->Head1Level
1518
1519   my $level = $self->Head1Level() - 1 + $num;
1520
1521   # Warn if heading to large
1522   if ($num > $#LatexSections) {
1523     my $line = $parobj->file_line;
1524     my $file = $self->input_file;
1525     warn "Heading level too large ($level) for LaTeX at line $line of file $file\n";
1526     $level = $#LatexSections;
1527   }
1528
1529   # Check to see whether section should be unnumbered
1530   my $star = ($level >= $self->LevelNoNum ? '*' : '');
1531
1532   # Section
1533   $self->_output("\\" .$LatexSections[$level] .$star ."{$paragraph\\label{".$label ."}\\index{".$index."}}\n");
1534
1535 }
1536
1537
1538 =back
1539
1540 =end __PRIVATE__
1541
1542 =begin __PRIVATE__
1543
1544 =head2 Internal methods
1545
1546 Internal routines are described in this section. They do not form part of the
1547 public interface. All private methods start with an underscore.
1548
1549 =over 4
1550
1551 =item B<_output>
1552
1553 Output text to the output filehandle. This method must be always be called
1554 to output parsed text.
1555
1556    $parser->_output($text);
1557
1558 Does not write anything if a =begin is active that should be
1559 ignored.
1560
1561 =cut
1562
1563 sub _output {
1564   my $self = shift;
1565   my $text = shift;
1566
1567   print { $self->output_handle } $text
1568     unless $self->{_suppress_all_para};
1569
1570 }
1571
1572
1573 =item B<_replace_special_chars>
1574
1575 Subroutine to replace characters that are special in C<latex>
1576 with the escaped forms
1577
1578   $escaped = $parser->_replace_special_chars($paragraph);
1579
1580 Need to call this routine before interior_sequences are munged but not
1581 if verbatim. It must be called before interpolation of interior
1582 sequences so that curly brackets and special latex characters inserted
1583 during interpolation are not themselves escaped. This means that < and
1584 > can not be modified here since the text still contains interior
1585 sequences.
1586
1587 Special characters and the C<latex> equivalents are:
1588
1589   }     \}
1590   {     \{
1591   _     \_
1592   $     \$
1593   %     \%
1594   &     \&
1595   \     $\backslash$
1596   ^     \^{}
1597   ~     \~{}
1598   #     \#
1599
1600 =cut
1601
1602 sub _replace_special_chars {
1603   my $self = shift;
1604   my $paragraph = shift;
1605
1606   # Replace a \ with $\backslash$
1607   # This is made more complicated because the dollars will be escaped
1608   # by the subsequent replacement. Easiest to add \backslash
1609   # now and then add the dollars
1610   $paragraph =~ s/\\/\\backslash/g;
1611
1612   # Must be done after escape of \ since this command adds latex escapes
1613   # Replace characters that can be escaped
1614   $paragraph =~ s/([\$\#&%_{}])/\\$1/g;
1615
1616   # Replace ^ characters with \^{} so that $^F works okay
1617   $paragraph =~ s/(\^)/\\$1\{\}/g;
1618
1619   # Replace tilde (~) with \texttt{\~{}}
1620   $paragraph =~ s/~/\\texttt\{\\~\{\}\}/g;
1621
1622   # Now add the dollars around each \backslash
1623   $paragraph =~ s/(\\backslash)/\$$1\$/g;
1624   return $paragraph;
1625 }
1626
1627 =item B<_replace_special_chars_late>
1628
1629 Replace special characters that can not be replaced before interior
1630 sequence interpolation. See C<_replace_special_chars> for a routine
1631 to replace special characters prior to interpolation of interior
1632 sequences.
1633
1634 Does the following transformation:
1635
1636   <   $<$
1637   >   $>$
1638   |   $|$
1639
1640
1641 =cut
1642
1643 sub _replace_special_chars_late {
1644   my $self = shift;
1645   my $paragraph = shift;
1646
1647   # < and >
1648   $paragraph =~ s/(<|>)/\$$1\$/g;
1649
1650   # Replace | with $|$
1651   $paragraph =~ s'\|'$|$'g;
1652
1653
1654   return $paragraph;
1655 }
1656
1657
1658 =item B<_create_label>
1659
1660 Return a string that can be used as an internal reference
1661 in a C<latex> document (i.e. accepted by the C<\label> command)
1662
1663  $label = $parser->_create_label($string)
1664
1665 If UniqueLabels is true returns a label prefixed by Label()
1666 This can be suppressed with an optional second argument.
1667
1668  $label = $parser->_create_label($string, $suppress);
1669
1670 If a second argument is supplied (of any value including undef)
1671 the Label() is never prefixed. This means that this routine can
1672 be called to create a Label() without prefixing a previous setting.
1673
1674 =cut
1675
1676 sub _create_label {
1677   my $self = shift;
1678   my $paragraph = shift;
1679   my $suppress = (@_ ? 1 : 0 );
1680
1681   # Remove latex commands
1682   $paragraph = $self->_clean_latex_commands($paragraph);
1683
1684   # Remove non alphanumerics from the label and replace with underscores
1685   # want to protect '-' though so use negated character classes
1686   $paragraph =~ s/[^-:\w]/_/g;
1687
1688   # Multiple underscores will look unsightly so remove repeats
1689   # This will also have the advantage of tidying up the end and
1690   # start of string
1691   $paragraph =~ s/_+/_/g;
1692
1693   # If required need to make sure that the label is unique
1694   # since it is possible to have multiple pods in a single
1695   # document
1696   if (!$suppress && $self->UniqueLabels() && defined $self->Label) {
1697     $paragraph = $self->Label() .'_'. $paragraph;
1698   }
1699
1700   return $paragraph;
1701 }
1702
1703
1704 =item B<_create_index>
1705
1706 Similar to C<_create_label> except an index entry is created.
1707 If C<UniqueLabels> is true, the index entry is prefixed by
1708 the current C<Label> and an exclamation mark.
1709
1710   $ind = $parser->_create_index($paragraph);
1711
1712 An exclamation mark is used by C<makeindex> to generate
1713 sub-entries in an index.
1714
1715 =cut
1716
1717 sub _create_index {
1718   my $self = shift;
1719   my $paragraph = shift;
1720   my $suppress = (@_ ? 1 : 0 );
1721
1722   # Remove latex commands
1723   $paragraph = $self->_clean_latex_commands($paragraph);
1724
1725   # If required need to make sure that the index entry is unique
1726   # since it is possible to have multiple pods in a single
1727   # document
1728   if (!$suppress && $self->UniqueLabels() && defined $self->Label) {
1729     $paragraph = $self->Label() .'!'. $paragraph;
1730   }
1731
1732   # Need to replace _ with space
1733   $paragraph =~ s/_/ /g;
1734
1735   return $paragraph;
1736
1737 }
1738
1739 =item B<_clean_latex_commands>
1740
1741 Removes latex commands from text. The latex command is assumed to be of the
1742 form C<\command{ text }>. "C<text>" is retained
1743
1744   $clean = $parser->_clean_latex_commands($text);
1745
1746 =cut
1747
1748 sub _clean_latex_commands {
1749   my $self = shift;
1750   my $paragraph = shift;
1751
1752   # Remove latex commands of the form \text{ }
1753   # and replace with the contents of the { }
1754   # need to make this non-greedy so that it can handle
1755   #  "\text{a} and \text2{b}"
1756   # without converting it to
1757   #  "a} and \text2{b"
1758   # This match will still get into trouble if \} is present
1759   # This is not vital since the subsequent replacement of non-alphanumeric
1760   # characters will tidy it up anyway
1761   $paragraph =~ s/\\\w+{(.*?)}/$1/g;
1762
1763   return $paragraph
1764 }
1765
1766 =item B<_split_delimited>
1767
1768 Split the supplied string into two parts at approximately the
1769 specified word boundary. Special care is made to make sure that it
1770 does not split in the middle of some curly brackets.
1771
1772 e.g. "this text is \textbf{very bold}" would not be split into
1773 "this text is \textbf{very" and " bold".
1774
1775   ($hunk1, $hunk2) = $self->_split_delimited( $para, $length);
1776
1777 The length indicates the maximum length of hunk1.
1778
1779 =cut
1780
1781 # initially Supplied by hsmyers@sdragons.com
1782 # 10/25/01, utility to split \hbox
1783 # busting lines. Reformatted by TimJ to match module style.
1784 sub _split_delimited {
1785   my $self = shift;
1786   my $input = shift;
1787   my $limit = shift;
1788
1789   # Return immediately if already small
1790   return ($input, '') if length($input) < $limit;
1791
1792   my @output;
1793   my $s = '';
1794   my $t = '';
1795   my $depth = 0;
1796   my $token;
1797
1798   $input =~ s/\n/ /gm;
1799   $input .= ' ';
1800   foreach ( split ( //, $input ) ) {
1801     $token .= $_;
1802     if (/\{/) {
1803       $depth++;
1804     } elsif ( /}/ ) {
1805       $depth--;
1806     } elsif ( / / and $depth == 0) {
1807       push @output, $token if ( $token and $token ne ' ' );
1808       $token = '';
1809     }
1810   }
1811
1812   foreach  (@output) {
1813     if (length($s) < $limit) {
1814       $s .= $_;
1815     } else {
1816       $t .= $_;
1817     }
1818   }
1819
1820   # Tidy up
1821   $s =~ s/\s+$//;
1822   $t =~ s/\s+$//;
1823   return ($s,$t);
1824 }
1825
1826 =back
1827
1828 =end __PRIVATE__
1829
1830 =head1 NOTES
1831
1832 Compatible with C<latex2e> only. Can not be used with C<latex> v2.09
1833 or earlier.
1834
1835 A subclass of C<Pod::Select> so that specific pod sections can be
1836 converted to C<latex> by using the C<select> method.
1837
1838 Some HTML escapes are missing and many have not been tested.
1839
1840 =head1 SEE ALSO
1841
1842 L<Pod::Parser>, L<Pod::Select>, L<pod2latex>
1843
1844 =head1 AUTHORS
1845
1846 Tim Jenness E<lt>tjenness@cpan.orgE<gt>
1847
1848 Bug fixes and improvements have been received from: Simon Cozens
1849 E<lt>simon@cozens.netE<gt>, Mark A. Hershberger
1850 E<lt>mah@everybody.orgE<gt>, Marcel Grunauer
1851 E<lt>marcel@codewerk.comE<gt>, Hugh S Myers
1852 E<lt>hsmyers@sdragons.comE<gt>, Peter J Acklam
1853 E<lt>jacklam@math.uio.noE<gt>, Sudhi Herle E<lt>sudhi@herle.netE<gt>,
1854 Ariel Scolnicov E<lt>ariels@compugen.co.ilE<gt>,
1855 Adriano Rodrigues Ferreira E<lt>ferreira@triang.com.brE<gt> and
1856 R. de Vries E<lt>r.de.vries@dutchspace.nlE<gt>.
1857
1858
1859 =head1 COPYRIGHT
1860
1861 Copyright (C) 2000-2004 Tim Jenness. All Rights Reserved.
1862
1863 This program is free software; you can redistribute it and/or modify
1864 it under the same terms as Perl itself.
1865
1866 =begin __PRIVATE__
1867
1868 =head1 REVISION
1869
1870 $Id: LaTeX.pm,v 1.19 2004/12/30 01:40:44 timj Exp $
1871
1872 =end __PRIVATE__
1873
1874 =cut
1875
1876 1;