#!/usr/bin/perl

# Kalvis M. Jansons

# This script is called ``txt2tex'', and converts well formatted plain
# text in LaTeX.

# On a UNIX system replace the top line with something like: #!/usr/bin/perl
# but with your system's path to perl!

# Copyright (C) 1998 --- 2008  Kalvis M. Jansons

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.


#: # Txt2tex auto find perl code.
#     eval 'exec perl -S $0 "$@"'
#     if 0;

# Version control lines
$version = '4.0';

#h
#h TXT2TeX Copyright (C) 1998 --- 2008 Kalvis M. Jansons
#h =====================================================
#h
#h
#h This program is free software: you can redistribute it and/or modify
#h it under the terms of the GNU General Public License as published by
#h the Free Software Foundation, either version 3 of the License, or
#h (at your option) any later version.
#h
#h This program is distributed in the hope that it will be useful,
#h but WITHOUT ANY WARRANTY; without even the implied warranty of
#h MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#h GNU General Public License for more details.
#h
#h You should have received a copy of the GNU General Public License
#h along with this program. If not, see <http://www.gnu.org/licenses/>.
#h
#h This perl script (which is part of the KalTeX package) converts plain text
#h into something with a little LaTeX formatting.  If you are reading a LaTeXed
#h version of this ``readme'' file, it was made from the comments in the code
#h of txt2tex using txt2tex to format them; if you are reading the plain text
#h version, try running it through txt2tex (you can use ``txt2tex --demo'' for
#h this on a unix system).
#h
#h Written by Kalvis M. Jansons (email address k@kalvis.com), but based on
#h txt2html by Seth Golub (email address seth@aigeek.com).  So if you like it,
#h send an email to both of us, but thank Seth the most; if you have any
#h problems or suggestions send an email to me (Kalvis).
#h
#h By default, much of LaTeX's fine structure is disabled by definitions in the
#h .tex file header.  If you need to edit the LaTeX you may need to remove
#h or change some of these statements; or you may need to rerun txt2tex in a
#h lower escaping mode, to add more complex structures, like tables and
#h complex equations.  I did it this way as I will use txt2tex myself  mainly
#h for non-mathematical documents, and for those, I like to be able to type %
#h for percent etc., and paste in emails without worrying too much about all
#h the strange symbols. Set the ``-ec'' flag if you want to ``escape'' all
#h of LaTeX's special functions, and kill the ``\'', which is often the
#h safest setting for ``unknown'' document formats.
#h
#h
#h DO YOU WANT A DEMONSTRATION? IF SO, SEE BELOW.
#h
#h  * For a trivial demo of txt2tex, type ``txt2tex --info |txt2tex -ec''.
#h    o For a nicer copy of this readme file, try
#h      ``txt2tex --info |txt2tex -ec -ns -10pt''.
#h    o Or maybe you will like the look of this better:
#h      ``txt2tex --info |txt2tex -tf -ec -ns -10pt''.
#h      - Remember, to see the nice output, type something like:
#h        ``txt2tex --info |txt2tex -tf -ec -r off > readme.tex''
#h        followed by ``latex readme.tex; xdvi readme.dvi''.
#h    o On a unix or linux system try ``txt2tex --demo''.
#h  * The best test is clearly to try it on one of your own plain text files.
#h


########################
# Some initializations
#

# $mac = 1 if $^O =~ /MacOS/;  # Are we running under MacOS
#
# if ($mac)
# {
#    my($cmdLine, @args);
#    $cmdLine = &MacPerl::Ask("Enter command line options:");
#    require "shellwords.pl";
#    @args = &shellwords($cmdLine);
#    unshift(@ARGV, @args);
#    open(OUTPUT, ">t2t_output.tex");
#    select OUTPUT;
# }

@ruleset_dictionaries = 0;
$num_heading_styles = 0;

# The first field just marks this as the default headings, so I can check
# if they have been changed.

my @heading_tag = ("orig","section","subsection","subsubsection","paragraph",
		   "subparagraph");

#
#########################


#########################
# Configurable options
#
#h
#h Paper size
#h ~~~~~~~~~~
#h
#h The paper size is set to ``a4paper'', but if you would like a different
#h paper size I suggest finding the line with ``a4paper'' in txt2tex and
#h changing it once and for all. This can also be changed using the
#h ``--doctype'' option.
#h
#h Tag syntax
#h ~~~~~~~~~~
#h
#h In the options in the next section, the term ``tag'' is often used.  I
#h have used this term for many types of LaTeX mark-up instruction.  The
#h syntax for using tags with txt2tex is easy.  For a simple tag, which
#h puts a heading into a LaTeX subsection form, the tag is just ``subsection''.
#h For more complex, or nested, tags the syntax is a little more complex.  If,
#h for example, you wanted all section headings to be centered, the tag to do
#h it with would be ``section{\center''.  You could also add a ``clearpage''
#h so each section is on a new page, and a ``*'' so the sections are not
#h numbered; the tag would then be ``clearpage\section*{\center''.  Also
#h remember when using tags on a command line, you must take account of the
#h normal shell escaping conventions.
#h
#h Some important command line options
#h ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#h
#h Note that any command line option name can contain any number of ``_'' to
#h make the command line more readable, and, in fact, you only need a single
#h ``-'' for any of the names listed with ``--''.
#h
#h [(-dt|--doctype) <doctype>]
$doctype = "\\documentclass[a4paper,12pt]{article}";
#  Do not use ``null'' here, but rather "".
#h
#h Used to set the LaTeX documentclass or documentstyle.  It can be set to
#h ``null'' for no doctype, which is useful if you want to add some LaTeX
#h definitions above the definitions in the txt2tex header.  For an example,
#h see the definition of ``--switch slides'' at the end of txt2tex.
#h
#h [-10pt|-11pt|-12pt]
#h
#h Used to set the LaTeX font size.  The default is 12pt.  The ``pt'' can
#h be dropped.
#h
#h [(-up|--usepackage) <name>|off]
$usepackages = "";
#h
#h Sets a LaTeX ``usepackage'' definition.  No default packages loaded.
#h
#h [(-lh|--latexhooks) <name|mode>]
$latexhook = "\\jobname";
$latexhookmode = 0;
#h
#h Used to add LaTeX instructions from files.  Given a ``name'', it tells LaTeX
#h to read (if they exist) the files name-HeadB, name-HeadE, name-BodyB,
#h name-BodyE (with or without a suffix .tex); these files are read in to the
#h beginning and end of the HEAD and the beginning and end of the BODY.
#h Given a number, it sets the ``latex-hook'' mode, which controls which LaTeX
#h input statements are added; these are 1,2,4,8 for the above files, which
#h are bitwise ORed.  If a new LaTeX-hook name is given, the mode is set to 15,
#h i.e. all bits set.  If a mode is given, and no name has been set, the
#h default name ``\jobname'' is used as the name.  Hooks are off by default.
#h
#h Remember in LaTeX the basename of the LaTeX file is stored in the LaTeX
#h variable ``\jobname'', so by using this as the base part of your LaTeX
#h hooks, you would not have to change the LaTeX itself if you wanted to
#h use a different set of hook files, as you would need only to change the
#h name of the main LaTeX file.
#h
#h [(-ec|--escapechars) [<mode>]]
#h
$ESCBSLASH  =    1;
$ESCDOLLARS =    2;
$ESCSCRIPTS =    4;
$ESCANGLES  =    8;
$ESCAND     =   16;
$ESCVERT    =   32;
$ESCNUM     =   64;
$ESCTILDA   =  128;
$ESCPERCENT =  512;
$ESCDQUOTE  = 1024;
$ESCALL     = 2047;
$ESCDEFAULT = 2046;
$escapemode = $ESCDEFAULT;
#h
#h Used to set the escape mode. The options (which can be bitwise ORed) are:
#h
#h         1 --- escape \
#h         2 --- escape $
#h         4 --- escape ^ and _
#h         8 --- escape < and >
#h        16 --- escape &
#h        32 --- escape |
#h        64 --- escape #
#h       128 --- escape ~
#h       512 --- escape %
#h      1024 --- escape "
#h
#h (The above list shows what txt2tex does with complex formatting in the
#h plain text document, namely puts it in a LaTeX verbatim block, at least
#h in the LaTeX version of the documentation.)
#h The default mode is 2046, so the LaTeX backslash is still active. Using
#h ``-ec'' without a following number will escape everything, and ``-ec 0''
#h will escape nothing.  Note that mode 1 also fixes a problem with a line
#h that begins with white space and has ``['' as the first non-space
#h character.
#h
#h [-bm|--batchmode]
$batchmode = 0;
#h
#h Makes LaTeX run in its non-stopping mode, i.e. ignores any LaTeX
#h warnings about over-full boxes etc.. Off by default.
#h
#h [-nv|--noverbatim]
#h
#h Stops any output being put in verbatim blocks even if it looks like it
#h is ``preformatted''.  This sometimes gives other subroutines a chance
#h to format the data.  Off by default.
#h
#h [-sv|--splitverbatim]
$samepageverbatim = "\\samepage ";
# Set $samepageverbatim = "" if verbatim blocks can be split.
#h
#h Use this if verbatim blocks can be split by page breaks; the default is
#h that they cannot.
#h
#h [(-pb|--prebegin) <num>]
$preformat_trigger_lines = 2;
$preformat_after_blank = 0;
#h
#h Sets the number of preformatted-looking lines (2 by default) needed
#h to begin a verbatim block. The options are:
#h
#h    * 0 --- put the entire document in a verbatim block.
#h    * 1 --- one trigger line, so even a single line can be put in verbatim.
#h    * 2 --- two trigger lines.
#h    * 3 --- same as 1, but verbatim blocks can start only after a
#h      blank line.
#h
#h Less than 0 is set to 0 and more than 3 is set to 3.
#h
#h [(-pe|--preend) <num>]
$endpreformat_trigger_lines = 2;
#h
#h Sets the number of non-preformatted-looking lines (2 by default) needed
#h to end a verbatim block. The options are from 0 to 3, with less than
#h 0 set to 0 and more than 3 set to 3.  Option 3 has the special meaning
#h of ending the verbatim block on a blank line.
#h
#h NOTE for --prebegin and --preend: If only one is zero, the other is ignored.
#h If both are zero, the entire document is put in a verbatim block.
#h
#h [(-p|--preformat) <num[,num[,num]]>]
$verbatim_white_min = 6;
$verbatim_min = 6;
$verbatim_post_min = 3;
#h
#h This option sets the values of the following variables:
#h
#h    * $verbatim_white_min (6),
#h    * $verbatim_min (6),
#h    * $verbatim_post_min (3),
#h
#h where the numbers in () are the defaults.  If only one number is given,
#h it sets $verbatim_white_min and $verbatim_min to this value, otherwise it
#h sets the variables in order.  A line is considered to be  preformatted if
#h either there is a non-space character followed by $verbatim_min non-word
#h characters, or if there are at least $verbatim_white_min spaces after
#h the start of the line and the line contains a non-space character
#h followed by $verbatim_post_min non-word characters.
#h
#h Note that tabs are expanded before these tests.
#h
#h [-ns|--nosectionnumbers]
$nosectionnumbers = "";
#h
#h Do not number LaTeX sections. They may already have numbers, for example,
#h or you may feel that the document looks better without them.  In fact, all
#h this really does is add a ``*'' at the end of the headings tags, so if you
#h have changed these tags, be sure that ``-ns'' still makes sense for your
#h tags.
#h
#h [-np|--nopagenumbers]
$nopagenumbers = 0;
#h
#h Do not number LaTeX pages, i.e. set the pagestyle to empty.
#h
#h [(-lm|--listmode) <mode>]
$listmode = 0;
#h
#h Sets the list mode; the bitwise ORed options are:
#h
#h    *  0 --- automatically number and label lists, renumbering what appear
#h    to be lists with errors.  Use standard LaTeX numbering and labelling.
#h    *  1 --- keep the original numbers (or letters) on enumerated lists, but
#h    put standard labels on itemized lists.
#h    *  2 --- turn itemized lists into enumerated lists.
#h    *  4 --- hrules end all active lists.
#h    *  8 --- easy start.  Enumerated lists need not start with 1, A, etc.,
#h    which is useful for documents that have headings, diagrams etc. in
#h    lists.  You would normally use this with list mode 1, to avoid
#h    renumbering.
#h    * 16 --- turn LaTeX description environments into enumerate; this may
#h    sound a strange thing to do, but leads to nice results.  Try it!
#h    * 32 --- do not nest description environments. Normally a new 
#h    description starts for every new level of indentation, but this mode
#h    switches this feature off.
#h
#h Using ``-lm'' without a following number sets the default mode 0.
#h
#h [(-de|--description) <regexp>|off]
#h
#h Sets the regular expressions to identify lines that should be put in a LaTeX
#h ``description'' environment.  Only the ``first match'' in the regular 
#h expression will be used as the ``name'' in the ``description'', and the 
#h rest is deleted.  So, if you do not want to delete anything, put your 
#h regular expression in ``()''.  This is off by default, and the default 
#h can be reset with the command line option ``-de off''.  See the definitions 
#h of ``-sw remind'' and ``-sw dict'' for examples.
#h
#h [(-s|--shortline) <[-]num>]
$short_line_length = 40;
$ignore_leading_spaces = 1;
#h
#h Sets the upper bound of the length of a ``short line'' (40 by default),
#h which is assumed to be intentionally this short, so must be kept broken.
#h If the number given is negative, leading spaces are not ignored when
#h determining if a line is ``short''. The default is that leading spaces
#h are ignored.
#h
#h [(-ss|--shortlineskip) <length>]
$shortlineskip = "";  # Use "" rather than ``null'' here.
#h
#h Sets the vertical skip after a ``short line'', for example try ``-ss 1ex''.
#h The default is a normal line break. The default can be restored by setting
#h it to ``null''.
#h
#h [(-r|--hrule) <num>|off]
$hrule_min = 4;			
$hrules_on = 1;
#h
#h If given a number, sets the minimum number of ``==='' etc. for a horizontal
#h rule.  The default is 4.  If given ``off'', sets $hrules_on = 0, and any
#h hrules found are not printed.
#h
#h [(-sm|--smallmargins) [<mode>]]
$smallmargins = 0;
#h
#h LaTeX defaults to large margins, but I like small (1in) margins. The
#h bitwise ORed options are:
#h
#h   * 0 --- standard LaTeX margins.
#h   * 1 --- 1in X margins.
#h   * 2 --- 1in Y margins.
#h   * 3 --- 1in X and Y margins.
#h
#h The default is 0.  If ``-sm'' is not followed by a valid number, then
#h option 3 is set.
#h
#h [(-t|--title) <title>]
$title = 0;	
#h
#h You can specify a title to be placed at the top of the document.
#h
#h [(-tt|--titletag) <tag>]
$titletag = "centerline\{\\LARGE\\bf";
#h
#h Used to set the title tag.  The default tag is ``centerline{\LARGE\bf''.
#h
#h [-tf/+tf] | [--titlefirst/--notitlefirst]
$titlefirst = 0;
#h	
#h Use the first non-blank line as the title of the document.  Off by default.
#h
#h [(-pi|--parindent) <num>]
$par_indent = 3;
#h
#h Sets the minumum number of spaces indented in first line of a paragraph.
#h This is used only  when there's no blank line  preceding the paragraph.
#h The default is 3.
#h
#h [(-c|--caps) <num>]
$min_caps_length = 3;
#h
#h Sets the minimum sequential CAPS for a ``caps line'', which is then put
#h in a special font.  For the full definition of a caps line, see the code.
#h The default is 3.
#h
#h [(-ct|--capstag) <tag>|off]
$caps_tag = "subsubsection\*";
#  Use "" rather than ``off'' here.
#h
#h Sets the tag to put around ``caps lines''.  Set it to ``off'' for no
#h caps lines, but note that some of these lines could then be marked as solo
#h lines; if you want to avoid this, set it to ``null'', which is turned into
#h the empty tag.  The default tag is ``subsubsection*''.
#h
#h [(-st|--solotag) <tag>|off]
$solo_tag = "subsubsection\*\{\\textit";
#  Use "" rather than ``off'' here.
#h
#h Sets the tag for ``solo lines'', i.e. lines that have a blank line before
#h and after, and have the ``right'' important-looking ending (see
#h ``sub solo'' for the full definition).  The default tag for solo lines is
#h ``subsubsection*{\textit''.  Set it to ``off'' for no solo lines.
#h
#h [(-m|--mail) [<mode>]]
$MAILHQ   = 1;
$MAILCUT  = 2;
$MAILPAGE = 4;
$MAILBODY = 8;	
$MAILDEFAULT = 1;
$mailmode = $MAILDEFAULT;
#h
#h Used to set the mail mode.  The bitwise ORed options are:
#h
#h    * 1 --- deal with mail headers and mail quoted text.
#h    * 2 --- add half-line width right-flushed hrules at the beginning of
#h    new messages. Strange, but easy to see!
#h    * 4 --- add a LaTeX ``clearpage'' before each new message.
#h    * 8 --- do not print the mail header.
#h
#h ``-m'' without a following number sets the default mail mode of 1. (Any
#h non-zero option also includes option 1.)
#h
#h [-u/+u] | [--unhyphenate/--nounhyphenate]
$unhyphenation = 1;
#h
#h Enables unhyphenation of the raw text, so we can leave hyphenation to
#h LaTeX.  On by default.
#h
#h [(-ul|--ulength) <num>]
$underline_length_tolerance = 1;
#h
#h Sets the underline tolerance for plain text headings, i.e. how much longer
#h or shorter than the text can underlines be and still be underlines.  The
#h default is 1.
#h
#h [(-uo|--uoffset) <num>]
$underline_offset_tolerance = 1;
#h
#h Sets the offset tolerance for underlines of plain text headings.  The
#h default is 1.
#h
#h [(-tw|--tabwidth) <num>]
$tab_width = 8;
#h
#h Sets the width of a tab.  The default is 8.

$indent_width = 3;
#  Indents this many spaces for each level of a list.
#h
#h [-e/+e] | [--extract/--noextract]
$extract = 0;
#h
#h Sets extract mode for making inserts for other LaTeX documents.  Off
#h by default.
#h
#h [(-rs|--ruleset) <file>]
#h
#h [+rs|--noruleset]
$make_ruleset = 1;		
#h
#h By default reads the ruleset in ``.txt2tex-ruleset'' (if it exists),
#h but a different file can be given.  When looking for a specified ruleset
#h file, if it fails to find a direct match, it will then try ``file-ruleset''
#h and last of all  ``~/.txt2tex-file'', where ``file'' is the given file name.
#h
#h [-ro/+ro] | [--rulesetonly/--norulesetonly]
$ruleset_only = 0;
#h
#h Do no escaping or marking up at all, except for processing the ruleset
#h dictionary file and applying it.  This is useful if you want to use
#h txt2tex's rulesetting feature on a LaTeX document.  If the LaTeX is a
#h complete document (includes HEAD and BODY) then you will need to use
#h the --extract option also.  Off by default.
#h
#h [(-H|--heading) <regexp>]
@custom_heading_regexp = ();
#h
#h Used to set regular expressions to pick out custom headings in the plain
#h text.  For examples, see the ``switch'' options at the end of txt2tex,
#h in particular ``num''. Header levels are assigned by regexp in the order
#h seen; when a line matches a custom header regexp, it is tagged as
#h a header.  If it is the first time that particular regexp has matched,
#h the next available header level is associated with it and applied to
#h the line.  Any later matches of that regexp will use the same header level.
#h Therefore, if you want to match numbered header lines, you could use
#h something like this:
#h
#h -H '^ *\d+\. \w+' -H '^ *\d+\.\d+\. \w+' -H '^ *\d+\.\d+\.\d+\. \w+'
#h
#h Then lines like:
#h
#h 2. Examples
#h 2.1. More Examples
#h 2.1.1. Even More Examples
#h
#h would be marked as section, subsection, etc., assuming they were found in
#h that order, and that no other header styles were found.  If you prefer
#h that the first heading specified always becomes ``section'', the second
#h always becomes ``subsection'' etc., then use the --explicitheadings option.
#h Also you would probably want the --nosectionnumbers option, to avoid getting
#h two sets of numbers; this could also be fixed using the --trimheadings
#h option (see the definition of ``--switch n'').
#h
#h [(-HT|--headingtags) <tag1[,tag2...]>|shift|number]
$numberheadings = 0;
#h
#h [(-TH|--trimheadings) <regexp>]
$trimheadings = "";
#h
#h The sequence of tags for the section headings can be set by something like:
#h ``-HT something,anotherthing,...'' and the headings can be trimmed using
#h ``-TH <regexp>'', i.e. whatever matches ``regexp'' is removed.  Note that
#h all headings are trimmed using the same regular expression and that the
#h regular expression is applied after the heading tag and label have been
#h added.  The argument of ``-HT'' can also be ``shift'', which shifts the
#h sequence of heading tags down by one, or ``number'', which tells txt2tex
#h (rather than LaTeX) to number the headings (off by default).  Remember not
#h to ask LaTeX to number the headings too, if you use ``number''.
#h
#h [-EH/+EH] | [--explicitheadings/--noexplicitheadings]
$explicitheadings = 0;
#h
#h This tells txt2tex not to try to find any headings except the custom ones
#h specified.  Also, the custom headings will not be assigned levels in the
#h order they are encountered in the document, but in the order they are
#h specified on the command line.  Off by default.
#h
#h  [(-db|--debug) <num>]
$dict_debug = 0;
#h
#h Debug mode for ruleset dictionaries. Bitwise OR what you want to see:
#h
#h    * 1 --- the parsing of the dictionary.
#h    * 2 --- the code that will make the ruleset.
#
#  [-nA] | [--notA]
$notA = 0;
#  Lists cannot start with "A", which is good for lists with initials etc..
#  Q., A. combinations are trapped by default in this version of txt2tex.
#h
#h [(-tr|--trim) <num|regexp>]
$trim = 0;
#h
#h Used to trim ``n'' characters from the beginning of each line longer than
#h ``n'', or to trim using a regular expression.  The default is 0.
#h
#h [(-sw|--switch) <keyword>]
#h
#h Used to add sets of command line options that are kept at the bottom
#h of this file.  For example ``-sw num'' will help pick out numbered
#h section headings, and ``-sw lynx'' cleans up text files from the lynx
#h browser.  Take a look at the definition of ``-sw num'', and see if you
#h can work out what all the options do.  Then add some ``-sw'' options
#h of your own.  Also see the section on option sets below.
#h
#h [-tc|--twocolumn]
#h
#h Sets LaTeX's ``twocolumn'' option.  Off by default.  To see what this looks
#h like with 1in margins, take a look at this ``readme'' file in this format
#h by typing ``txt2tex --demo'' on a unix or linux machine.
#h
#h [-ls|--landscape]
#h
#h Sets LaTeX's ``landscape'' option.  Off by default.
#h
#h [-sp|--sloppy]
$sloppy = 0;
#h
#h Sets LaTeX's ``sloppy'' option, which is particularly useful for slides.
#h Off by default.
#h
#h [-d|--draft]
#h
#h Save the output in a file called draft.tex.  Off by default.
#h
#h [(-h|--help)/--info/--demo]
#h
#h --help gives a short help message listing the options, --info gives a
#h plain text version of the ``readme'' file, and --demo (on a standard
#h unix or linux system) will run the plain text from --info through
#h txt2tex to give a nice LaTeXed version of the ``readme'' file; note that
#h the ``demo'' makes t2t_readme.txt, .tex, .dvi, .aux, and .log.
#h
#h [-v|--version]
#h
#h Prints the txt2tex version number.

$system_ruleset_dict = "/usr/local/lib/txt2tex-ruleset"; # after options
$default_ruleset_dict = "$ENV{'HOME'}/.txt2tex-ruleset"; # before options

# END OF CONFIGURABLE OPTIONS
########################################


########################################
# Definitions  (Don't change these)

$NONE       =   0;
$LIST       =   1;
$HRULE      =   2;
$PAR        =   4;
$VERB       =   8;
$END        =  16;
$BREAK      =  32;
$HEADER     =  64;
$MAILHEADER = 128;
$MAILQUOTE  = 256;
$CAPS       = 512;
$RULESET    =1024;
$SOLO       =2048;

$OL = 1;
$UL = 2;
$DL = 4;

# Character entity names

%char_entities =
    (
      "\007", "",
      "\014", "\n\n\\clearpage\n\n",
      "\243", "\\pounds\{\}",
      "\255", "--",
      "\267", "\\ensuremath\{\\bullet\{\}\}",
      "\251", "\\copyright\{\}",
      "\260", "" # degree symbol
      );

########################################
########################################
#
# Subroutine definitions

sub sampleruleset
{
    open(HELP, "$0");
    print "This file was created by txt2tex $version\n\n";
    while (<HELP>)
    {
        if (/^\#H(.*)$/)
	{
            print "$1\n";
        }
    }
}

sub help
{
    open(HELP, "$0");
    while (<HELP>)
    {
        if (/^\#h(.*)$/i)
	{
            print "$1\n";  # Note this writes to STDOUT not STDERR.
        }
    }
}

sub usage
{
    print "Usage\: txt2tex \[options\] \[input file\[s\]\]\n\n";
    open(HELP, "$0");
    while (<HELP>)
    {
        if (/^\#h( \[.*?)$/)
	{
            print "$1\n"; # Note this writes to STDOUT not STDERR.
        }
    }
}

sub hook
{
    my ($hookname,$hookthen,$hookelse) = @_;
    if ($latexhook)
    {
	print "\\InputIfFileExists\{$hookname\}\{";
	print "\\typeout\{$hookthen\}" if $hookthen;
	print "\}\{";
	print "\\typeout\{$hookelse\}" if $hookelse;
	print "\}\n\n";
    }
}


sub deal_with_options
{
    while (($#ARGV > -1) && ($ARGV[0] =~ /^[-+].+/))
    {

	$ARGV[0] =~ s/_//g;
            # Option names can have "_" to make them more readable.
	$ARGV[0] =~ s/^--/-/;
            # We do not really force long names to start with "--", a "-"
            # will do.

	if (($ARGV[0] eq "-switch" || $ARGV[0] eq "-sw") &&
	    $ARGV[1])
	{
	    die "--switch can only be used once!\n" if $data;
	
	    shift @ARGV;
	    my $word = shift @ARGV;

	    while ($line = <DATA>) {
		chomp $line;
		if ($line =~ s/\\$//)
		{
		    $line .= <DATA>;
		    redo unless eof();
		}
		
		@data = split(/\s+/,$line);		
		if ($word eq $data[0])
		{
		    unshift(@ARGV, @data);
		}
		last if $line =~ /\#===/;
	    }

	    $data = 1;
	    next;
	}

	if (($ARGV[0] eq "-rs" || $ARGV[0] eq "-ruleset") && $ARGV[1])
	{
	    $make_ruleset = 1;
	    # Stick it on the end of the list
	    push(@ruleset_dictionaries, $ARGV[1]);
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "+rs" || $ARGV[0] eq "-noruleset") )
	{
            $system_ruleset_dict = "";
            $make_ruleset = 0;
            @ruleset_dictionaries = 0;
	    next;
	}

	if (($ARGV[0] eq "-H" || $ARGV[0] eq "-heading") && $ARGV[1])
	{
	    push(@custom_heading_regexp, $ARGV[1]);

	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-headingtags" || $ARGV[0] eq "-HT") && $ARGV[1])
	{
	    if($ARGV[1] eq "shift")
	    {
		shift @heading_tag;
	    }
	    elsif ($ARGV[1] eq "number")
	    {
		$numberheadings = 1;
		$nosectionnumbers = "*" if $heading_tag[0] ne "new";
	    }
	    else
	    {
		$nosectionnumbers = "";
		@heading_tag = split(/[,]/, $ARGV[1]);
		unshift(@heading_tag, "new");
	    }
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-EH" || $ARGV[0] eq "-explicitheadings") )
	{
            $explicitheadings = 1;
	    next;
	}

	if (($ARGV[0] eq "+EH" || $ARGV[0] eq "-noexplicitheadings") )
	{
            $explicitheadings = 0;
	    next;
	}

	if (($ARGV[0] eq "-latexhooks" || $ARGV[0] eq "-lh") && $ARGV[1])
	{
	    if ($ARGV[1] =~ /\d\d?/)
	    {
		$latexhookmode = $ARGV[1];
	    }
	    else
	    {
		$latexhook = $ARGV[1];
		$latexhookmode = 15;
	    }
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-r" || $ARGV[0] eq "-hrule")
	    && $ARGV[1] =~ /^(\d+|off)$/)
	{
	    if ($ARGV[1] =~ /off/)
	    {
		$hrules_on = 0;
	    }
	    else
	    {
		$hrule_min = $ARGV[1];
	    }
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-s" || $ARGV[0] eq "-shortline")
	    && ($ARGV[1] =~ /^\-?\d+$/))
	{
	    $short_line_length = $ARGV[1];
	    if ($ARGV[1] =~ /^\-/)
	    {
		$short_line_length =~ s/^\-//;
		$ignore_leading_spaces = 0;
	    }
	    else
	    {
		$ignore_leading_spaces = 1;
	    }
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-p" || $ARGV[0] eq "-preformat") &&
	    $ARGV[1] =~ /^\d+(\,\d+){0,2}$/)
	{
	    if ($ARGV[1] =~ /^\d+$/)
	    {
		$verbatim_white_min = $ARGV[1];
		$verbatim_min = $verbatim_white_min;
	    }
	    else
	    {
		$ARGV[1] .= ",$verbatim_post_min";
		($verbatim_white_min, $verbatim_min, $verbatim_post_min)
		    = split(/\,/, $ARGV[1]);
	    }
	
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-pb" || $ARGV[0] eq "-prebegin") &&
	    $ARGV[1] =~ /^-?\d+$/)
	{
	    if ($ARGV[1] > 2)
	    {
		$preformat_after_blank = 1;
		$preformat_trigger_lines = 1;
	    }
	    else
	    {
		$preformat_after_blank = 0;
		$preformat_trigger_lines = $ARGV[1];
	    }
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-pe" || $ARGV[0] eq "-preend") &&
	    $ARGV[1] =~ /^-?\d+$/)
	{
	    $endpreformat_trigger_lines = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-e" || $ARGV[0] eq "-extract"))
	{
	    $extract = 1;
	    next;
	}

	if (($ARGV[0] eq "+e" || $ARGV[0] eq "-noextract"))
	{
	    $extract = 0;
	    next;
	}

	if (($ARGV[0] eq "-c" || $ARGV[0] eq "-caps") &&
	    $ARGV[1] =~ /^\d+$/)
	{
	    $min_caps_length = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-ct" || $ARGV[0] eq "-capstag") && $ARGV[1])
	{
	    $caps_tag = $ARGV[1];
	    if ($caps_tag eq "off")
	    {
		$caps_tag = "";
	    }
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-solotag" || $ARGV[0] eq "-st") && $ARGV[1])
	{
	    $solo_tag = $ARGV[1];
	    if ($solo_tag eq "off")
	    {
		$solo_tag = "";
	    }
	    shift @ARGV;
	    next;
	}

	if ($ARGV[0] eq "-u" || $ARGV[0] eq "-unhyphen")
	{
	    $unhyphenation = 1;
	    next;
	}

	if ($ARGV[0] eq "+u" || $ARGV[0] eq "-nounhyphen")
	{
	    $unhyphenation = 0;
	    next;
	}

	if (($ARGV[0] eq "-titletag" || $ARGV[0] eq "-tt") && $ARGV[1])
	{
	    $titletag = $ARGV[1];

	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-t" || $ARGV[0] eq "-title") && $ARGV[1])
	{
	    $title = $ARGV[1];

	    shift @ARGV;
	    next;
	}

	if ($ARGV[0] eq "-tf" || $ARGV[0] eq "-titlefirst")
	{
	    $titlefirst = 1;
	    next;
	}

	if ($ARGV[0] eq "+tf" || $ARGV[0] eq "-notitlefirst")
	{
	    $titlefirst = 0;
	    next;
	}

	if (($ARGV[0] eq "-dt" || $ARGV[0] eq "-doctype") && $ARGV[1])
	{
	    $doctype = $ARGV[1];
	    $doctype = "" if $doctype eq "null";
	    shift @ARGV;
	    next;
	}	

	if (($ARGV[0] eq "-trimheadings" || $ARGV[0] eq "-TH") && $ARGV[1])
	{
	    $trimheadings = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-usepackage" || $ARGV[0] eq "-up") && $ARGV[1])
	{
	    if ($ARGV[1] eq "off")
	    {
		$usepackages = "";
	    }
	    else
	    {
		$usepackages .= "\\usepackage\{" . $ARGV[1] . "\}\n";
	    }	
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-ul" || $ARGV[0] eq "-ulength")
	    && $ARGV[1] =~ /^\d+$/)
	{
	    $underline_length_tolerance = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-uo" || $ARGV[0] eq "-uoffset") &&
	    $ARGV[1] =~ /^\d+$/)
	{
	    $underline_offset_tolerance = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-tw" || $ARGV[0] eq "-tabwidth") &&
	    $ARGV[1] =~ /^\d+$/)
	{
	    $tab_width = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-pi" || $ARGV[0] eq "-parindent")
	    && $ARGV[1] =~ /^\d+$/)
	{
	    $par_indent = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if ($ARGV[0] eq "-ec" || $ARGV[0] eq "-escapechars")
	{
	    if($ARGV[1] =~ /^\d+$/)
	    {
		$escapemode = $ARGV[1];
		shift @ARGV;
	    }
	    else
	    {
		$escapemode = $ESCALL;
	    }
	    next;
	}


	if ($ARGV[0] eq "-m" || $ARGV[0] eq "-mail")
	{
	    if($ARGV[1] =~ /^\d+$/)
	    {
		$mailmode = $ARGV[1];
		shift @ARGV;
	    }
	    else
	    {
		$mailmode = $MAILDEFAULT;
	    }
	    next;
	}

	if ($ARGV[0] eq "-lm" || $ARGV[0] eq "-listmode")
	{
	    if($ARGV[1] =~ /^\d+$/)
	    {
		$listmode = $ARGV[1];
		shift @ARGV;
	    }
	    else
	    {
		$listmode = 0;
	    }
	    next;
	}

	if ($ARGV[0] eq "-ro" || $ARGV[0] eq "-rulesetonly")
	{
	    $ruleset_only = 1;
	    next;
	}

	if ($ARGV[0] eq "+ro" || $ARGV[0] eq "-norulesetonly")
	{
	    $ruleset_only = 0;
	    next;
	}

	if ($ARGV[0] eq "-v" || $ARGV[0] eq "-version")
	{
	    print "\nKalvis M. Jansons's txt2tex $version\n\n";
	    exit;
	}

	if ($ARGV[0] eq "-h" || $ARGV[0] eq "-help")
	{
	    &usage;
	    exit;
	}

	if ($ARGV[0] eq "-info")
	{
	    &help;
	    exit;
	}

	if (($ARGV[0] eq "-db" || $ARGV[0] eq "-debug") && $ARGV[1] =~ /^\d+$/)
	{
	    $dict_debug = $ARGV[1];
	    shift @ARGV;
	    next;
	}

	if ($ARGV[0] eq "-sampleruleset")
	{
	    &sampleruleset;
	    exit;
	}

	if ($ARGV[0] eq "-batchmode" || $ARGV[0] eq "-bm")
	{
	    $batchmode = 1;
	    next;
	}

	if ($ARGV[0] eq "-sloppy" || $ARGV[0] eq "-sp")
	{
	    $sloppy = 1;
	    next;
	}

	if ($ARGV[0] eq "-draft" || $ARGV[0] eq "-d")
	{
	    open(DRAFT, ">draft.tex");
	    select DRAFT;
	    next;
	}

	if ($ARGV[0] eq "-smallmargins" || $ARGV[0] eq "-sm")
	{
	    if ($ARGV[1] =~ /^[0-3]$/)
	    {
		$smallmargins = $ARGV[1];
		shift @ARGV;
	    }
	    else
	    {
		$smallmargins = 3;
	    }
	    next;
	}

	if ($ARGV[0] eq "-nopagenumbers" || $ARGV[0] eq "-np")
	{
	    $nopagenumbers = 1;
	    next;
	}

	if ($ARGV[0] eq "-nosectionnumbers" || $ARGV[0] eq "-ns")
	{
	    $nosectionnumbers = "*";
	    next;
	}

	if ($ARGV[0] =~ /^-1[0-2](pt)?$/)
	{
	    $ARGV[0] =~ s/^-//;
	    $ARGV[0] .= "pt" if $ARGV[0] =~ /^1\d$/;
	    $doctype =~ s/(class|style)/$1\[\]/ unless $doctype =~ /\[/;
	    $doctype =~ s/\d+pt/$ARGV[0]/ || $doctype =~ s/\]/\,$ARGV[0]\]/;
	    $doctype =~ s/\[\,/\[/;
	    next;
	}

	if ($ARGV[0] =~ /^-(landscape|ls|twocolumn|tc)$/)
	{
	    my $op = $ARGV[0];
	    $op =~ s/^-//;
	    if ($op =~ /tc/)
	    {
		$op = "twocolumn";
	    }
	    elsif ($op =~ /ls/)
	    {
		$op = "landscape";
	    }
	    $doctype =~ s/(class|style)/$1\[\]/ unless $doctype =~ /\[/;
	    $doctype =~ s/\]/,$op\]/ unless $doctype =~ /$op/;
	    $doctype =~ s/\[\,/\[/;
	    next;
	}

	if (($ARGV[0] eq "-description" || $ARGV[0] eq "-de") && $ARGV[1])
	{
	    if ($ARGV[1] ne "off")
	    {
		$description = $ARGV[1];
	    }
	    else
	    {
		$description = 0;
	    }
	    
	    shift @ARGV;
	    next;
	}

	if (($ARGV[0] eq "-trim" || $ARGV[0] eq "-tr") && $ARGV[1])
	{
	    if ($trim)
	    {
		if ($trim =~ /^\d+$/)
		{
		    $trim = $ARGV[1];
		}
		else
		{
		    $trim = "(" . $trim . "|" . $ARGV[1] . ")";
		}
	    }
	    else
	    {
		$trim = $ARGV[1];
	    }

	    shift @ARGV;
	    next;
	}

	if ($ARGV[0] eq "-noverbatim" || $ARGV[0] eq "-nv")
	{
            $preformat_trigger_lines = 2;
	    $endpreformat_trigger_lines = 0;
	    next;
	}


	if ($ARGV[0] eq "-splitverbatim" || $ARGV[0] eq "-sv")
	{
	    $samepageverbatim = "";
	    next;
	}

	if ($ARGV[0] eq "-notA" || $ARGV[0] eq "-nA")
	{
	    $notA = 1;
	    next;
	}

	if ($ARGV[0] eq "-demo")
	{
	    open(DEMO, "$0 --info|tee t2t_readme.txt|\
            $0 -sw demo > t2t_readme.tex;\
            pdflatex t2t_readme.tex; xpdf t2t_readme.pdf || open t2t_readme.pdf|");
	    while(<DEMO>)
	    {print;}
	    exit;
	}

        if (($ARGV[0] eq "-shortlineskip" || $ARGV[0] eq "-ss")
	    && $ARGV[1] =~ /^(\d|null)/)
        {
	    if ($ARGV[1] eq "null")
	    {
		$shortlineskip = "";
	    }
	    else
	    {
		$shortlineskip = "\[$ARGV[1]\]";
	    }
            shift @ARGV;
            next;
        }

        if ($ARGV[0] eq "--")
        {
            last;
        }

	print STDERR "Unrecognized option: $ARGV[0]\n";
	print STDERR " or bad paramater: $ARGV[1]\n" if($ARGV[1]);

	&usage;
	exit(1);

    }
    continue
    {
	shift @ARGV;
    }

    $preformat_trigger_lines = 0 if ($preformat_trigger_lines < 0);

    $endpreformat_trigger_lines = 1 if ($preformat_trigger_lines == 0);
    $endpreformat_trigger_lines = 0 if ($endpreformat_trigger_lines < 0);
    $endpreformat_trigger_lines = 3 if ($endpreformat_trigger_lines > 3);
}

sub is_blank
{
    return $_[0] =~ /^\s*$/;
}

sub escape
{
    $line =~ s/\{/\\\{/g;
    $line =~ s/\}/\\\}/g;

    if ($escapemode & $ESCDOLLARS)
    {
	$line =~ s/\\(?!(\{|\}))/\\\(\\backslash\\\)/g;
    }
    else
    {
	$line =~ s/\\(?!(\{|\}))/\{\\mbox\{\\\(\\backslash\\\)\}\}/g;	
    }

    $line =~ s/^(\s*)(\[)/$1\{\}\[/;
}

sub hrule
{
    # Do hrules end lists?
    if ($listnum && ($listmode & 4) && ($prev_action & $HRULE))
    {
	&endlist($listnum);
	$clearlines++; # Add blank line
	# When hrules end lists, it looks better to me if the length of the
	# hrule is the same as the list-text width rather than as the
	# following text width, but you might want to change this.
    }

    my $hruletype = "";

    if ($line =~ /^\s*(\+?[-_~=]\+?\s*){$hrule_min,}$/)
    {
	$hruletype = "\\hrulefill\n";
    }
    elsif ($line =~ /^\s*([+\*\.]\s*){$hrule_min,}$/)
    {
	$hruletype = "\\dotfill\n";
    }

    if ($hruletype)
    {
	if($hrules_on)
	{
	    $line = $hruletype;
	    $line_action |= $HRULE;
	}
	else
	{
	    $line = "\n";
	}
    }
}

sub shortline
{
    if (!($mode & $LIST) && !&is_blank($line) && !&is_blank($prev)
	&& ($prev_line_length - $prev_indent * $ignore_leading_spaces
	    <= $short_line_length)
	&& !($line_action & ($END | $HEADER | $HRULE | $LIST | $PAR))
	&& !($prev_action & ($HEADER | $HRULE | $BREAK | $CAPS | $SOLO)))
    {
	$prev .= "\\\\$shortlineskip" . chop($prev);
        $prev_action |= $BREAK;
    }
}

sub mailstuff
{
    if ((($line =~ /^\w*>/) || ($line =~ /^\w*\|/)) && !&is_blank($nextline))
    {
	$line =~ s/$/\\\\/;
	$line_action |= ($BREAK | $MAILQUOTE);
        if(!($prev_action & ($BREAK | $PAR)))
        {
            $prev .= "\n";
            $line_action |= $PAR;
        }
    }
    elsif (($line =~ /^(From:? .*\@)|^(Newsgroups: )/) && &is_blank($prev))
    {
	&endlist($listnum) if $listnum; # trakgalvis added line
        chop $line;
	$line = $line . "\\\\\n";
	$prev .= "\\subsection*\{\\hfill\\hrulefill\}\n"
	    if ($mailmode & $MAILCUT);
	$prev = "\\clearpage\n" . $prev if ($mailmode & $MAILPAGE);
	$line_action |= ($BREAK | $MAILHEADER | $PAR);
    }
    elsif (($line =~ /^[\w\-]*:/)  # Handle "Some-Header: blah"
	   && ($prev_action & $MAILHEADER) && !&is_blank($nextline))
    {
	$line =~ s/$/\\\\/;
	$line_action |= ($BREAK | $MAILHEADER);
    }
    elsif (($line =~ /^\s+\S/) &&   # Handle multi-line mail headers
	   ($prev_action & $MAILHEADER) &&
	   !&is_blank($nextline))
    {
	$line =~ s/$/\\\\/;
	$line_action |= ($BREAK | $MAILHEADER);
    }
}

# We can do a little tidying up here, but do not add too much here,
# but rather make a .txt2tex-ruleset file
sub tidy
{
    $prev =~ s/(\\begin\{.*?\}) *\\\\(\[.{5}\])?$/$1/g;
    # Put email addresses in tt font
    $line =~ s/(([\w-]+(\.[\w-]+)*\%)*[\w-]+(\.[\w-]+)*\@[\w-]+(\.[\w-]+)*\.[\w-]{2,3}(?![\w-]))/\\texttt\{$1\}/g;
    # Control space after i.e. ...
    $line =~ s/(\b)(i\.e\.|e\.g\.|etc\.|viz\.)[^\S\n]+/$1$2\\ /g;
    $line =~ s/([^\\])LaTeX/$1\\LaTeX\{\}/g;
#   Uncomment the next line to make isolated "-"s into m-rules.
#   $line =~ s/ - / --- /g;
    $line =~ s/\{\{\}/\{/g;

    # Now let us avoid a LaTeX bug, which results in a crash if a numbered
    # section heading has a footnote in it.  This is due to the table of
    # contents entry, which we will remove to avoid the error.
    if ($line =~ /\\footnote/)
    {
	$line =~ s/(\\(sub)*(section|paragraph))(?![\*\[])/$1\[\]/;
    }
}

# Subtracts modes listed in $mask from $vector.
sub subtract_modes
{
    my ($vector, $mask) = @_;
    ($vector | $mask) - $mask;
}

sub paragraph
{
    if(!&is_blank($line)
       && !&subtract_modes($line_action, $END | $MAILQUOTE | $BREAK)
       && (&is_blank($prev)
	   || ($line_indent >= $prev_indent + $par_indent)))
    {
	$line_action |= $PAR;
    }
}

# If the line is blank, return the second argument.  Otherwise,
# return the number of spaces before any nonspaces on the line.
sub count_indent
{
    my ($line, $prev_length) = @_;
    if(&is_blank($line))
    {
	return $prev_length;
    }
    my ($ws) = $line =~ /^( *)[^ ]/;
    length($ws);
}

sub listprefix
{
    local ($line) = @_;
    local ($prefix, $number, $rawprefix, $name);

    if ($description && ($line =~ /^\s*$description/))
    {
	($prefix, $name) = $line =~ /^(\s*)$description/;
	$name =~ s/\s*$//;
	$name =~ s/^\s*//;
	return (0,0,0) unless $name;
	$prefix = "" if ($listmode & 32);
	$prefix .= "DES";
    }
    elsif ($line =~ /^\s*[\(\[]?(\d+|[^\W\d_]|[ivx]+|[IVX]+)[\.\)\]:]\s+\S/)
    {
	($number) = $line =~ /(\w+)/;
	($rawprefix) = $line =~ /^(\s*\W?\w+\W)/;
	$prefix = $rawprefix;
	$prefix =~ s/\d+/1/ 	           # Put prefix in canonical form:
	    || $prefix =~ s/[a-z]+/a/      # numbers --> 1, [a-z]+ --> a
		|| $prefix =~ s/[A-Z]+/A/; # and [A-Z]+ --> A.
    }
    elsif ($line =~ /^\s*[-+=\*o\267]{1,2}\s+\S/)
    {
	$number = 0;
	($rawprefix) = $line =~ /^(\s*\S{1,2}\s)/;
	$prefix = $rawprefix;
    }
    else
    {
	return (0,0,0); 
    }

    ($prefix, $number, $rawprefix, $name);
}

sub startlist
{
    local ($prefix, $number, $rawprefix, $name) = @_;

    $listprefix[$listnum] = $prefix;

    my $listtype = "enumerate\}\n";

    if ($number)
    {
	# It doesn't start with 1,a,A,i,I leave it alone, and be carefull
        # if the list begins with an A and we have seen a Q, as in FAQs.

	$notA-- if (($notA ne "1") && ($number eq "Q"));

	if (!($listmode & 8) && ($number ne "1") 
	    && ($number ne "a") && !($number eq "A" && !$notA)
	    && ($number ne "i") && ($number ne "I"))
	{
	    $notA++ if ($number eq "A") & ($notA < 0);
	    return 0;
	}
	$list[$listnum] = $OL;
    }
    elsif ($name)
    {
	$listtype = "description\}\n" unless ($listmode & 16);
	$list[$listnum] = $DL;
    }
    else
    {
	$listtype = "itemize\}\n" unless ($listmode & 2);
	$list[$listnum] = $UL;
    }

    if ($prev =~ /\A\s*\Z/)
    {
	$prev = "$list_indent\\begin\{" . $listtype;
	$clearlines++; # Retain 1 blank line where blanks are found
    }
    else
    {
	$prev .= "$list_indent\\begin\{" . $listtype;
    }
    $listnum++;
    $list_indent = " " x $listnum x $indent_width;
    $line_action |= $LIST;
    $mode |= $LIST;
    1;
}

sub endlist			# End N lists
{
    my ($n) = @_;

    # Avoid white space above ``end list''
    $prev = "" if ($prev =~ /\A\s*\Z/);

    for (; $n > 0; $n--, $listnum--)
    {
	$list_indent = " " x ($listnum-1) x $indent_width;

	if(($list[$listnum-1] == $UL) && !($listmode & 2))
	{
	    $prev .= "$list_indent\\end\{itemize\}\n";
	}
	elsif ($list[$listnum-1] == $DL && !($listmode & 16))
	{
	    $prev .= "$list_indent\\end\{description\}\n";
	}
	else
	{
	    $prev .= "$list_indent\\end\{enumerate\}\n";
	}
    }

    if (!$listnum)
    {
	$mode ^= $LIST;
	$clearlines = 0; # Avoid white space above ``end list''
    }
    $line_action |= $END;
}

sub continuelist
{
    if ($list[$listnum-1] == $UL)
    {
	$line =~ s/^\s*\S{1,2}\s*/$list_indent\\item  /;
    }
    elsif ($list[$listnum-1] == $DL)
    {
	$line =~ s/^\s*$description\s*//;
	if (!$line)
	{
	    $line = "\\ " unless ($listmode & 16);
	    $line .= "\n";
	}
	$line = "$list_indent\\item\[$name\]  " . $line;
    }
    elsif ($list[$listnum-1] == $OL)
    {
	$line =~ s/^\s*\W?\w+\W\s*//;
	if ($listmode & 1)
	{
	    $line = "$list_indent\\item\[$number\.\]  " . $line;
	}
	else
	{
	    $line = "$list_indent\\item  " . $line;
	}
    }
    $line_action |= $LIST;
}

sub liststuff
{
    my ($i);

    local ($prefix, $number, $rawprefix, $name) = &listprefix($line);

    if (!$prefix)
    {
	return if !&is_blank($prev); # inside a list item
	# This is not a list, so end them all.
        &endlist($listnum) if $listnum;
        return;
    }

    # If numbers with more than one digit grow to the left instead of
    # to the right, the prefix will shrink and we'll fail to match the
    # right list.  We need to account for this.

    my ($prefix_alternate);

    if (length("" . $number) > 1)
    {
        $prefix_alternate = (" " x (length("" . $number) -1)) . $prefix;
    }

    # Maybe we're going back up to a previous list
    for ($i = $listnum - 1; ($i >= 0) && ($prefix ne $listprefix[$i]); $i--)
    {
        if (length( "" . $number ) > 1)
        {
            last if $prefix_alternate eq $listprefix[$i];
        }
    }

    my ($islist);

    # Measure the indent from where the text starts, not where the
    # prefix starts.  This won't screw anything up, and if we don't do
    # it, the next line might appear to be indented relative to this
    # line, and get tagged as a new paragraph.

    my ($total_prefix) = $line =~ /^(\s*[\w-+=\*o\267]+.\s*)/;

    # Of course, we only use it if it really turns out to be a list.

    $islist = 1;
    $i++;
    if (($i > 0) && ($i != $listnum))
    {
	&endlist($listnum - $i);
        $islist = 0;
    }
    elsif (!$listnum || ($i != $listnum))
    {
        if (($line_indent > 0) || &is_blank($prev)
            || ($prev_action & ($BREAK | $HEADER)))
        {
            $islist = &startlist($prefix, $number, $rawprefix, $name);
        }
	else
        {
            # We have something like this: "- foo" which usually
            # turns out not to be a list.
            return;
        }
    }

    &continuelist($prefix, $number, $rawprefix, $name) if ($mode & $LIST);
    $line_indent = length($total_prefix) if $islist;
}

# Returns true if the passed string is considered to be preformatted
sub is_preformatted
{
    (($_[0] =~ /\S[\W_]{$verbatim_min,}\S+/o)
     || (($_[0] =~ /^\s{$verbatim_white_min,}\S+/o)
	 && ($_[0] =~ /\S[\W_]{$verbatim_post_min,}\S+/o)));
}

sub endpreformat
{
    if ((($endpreformat_trigger_lines == 3) && &is_blank($line))
	|| (($endpreformat_trigger_lines < 3)
	    && ((!&is_preformatted($line)
		 && ($endpreformat_trigger_lines == 1
		     || !&is_preformatted($nextline))))))
    {
	$prev .= "\\end\{verbatim\}\n";
	$mode ^= ($VERB & $mode);
	$line_action |= $END;
    }
}

sub preformat
{
    if (($preformat_trigger_lines == 0
	 || (&is_preformatted($line)
	     && ($preformat_trigger_lines == 1
		 || &is_preformatted($nextline))))
	&& (&is_blank($prev) || !$preformat_after_blank))
    {
	$line =~ s/^/\\begin\{verbatim\}\n/;
	$mode |= $VERB;
	$line_action |= $VERB;
    }
}

sub make_new_label
{
    my ($heading_level) = @_;
    my ($label, $i);

    return sprintf("%d", $non_header_label++) if(!$heading_level);

    $label = "";
    $heading_count[$heading_level-1]++;

    # Reset lower order counters
    for($i=$#heading_count + 1; $i > $heading_level; $i--)
    {
        $heading_count[$i-1] = 0;
    }

    for($i=0; $i < $heading_level; $i++)
    {
        $heading_count[$i] = 1 if !$heading_count[$i]; # In case they skip any
        $label .= sprintf("%d.", $heading_count[$i]);
    }
    chop($label);
    $label;
}

sub label_heading
{
    &endlist($listnum) if $listnum;
    my ($level) = @_;
    my ($label) = &make_new_label($level);
    $line =~ s/=n=/$label/ if $numberheadings;
    $line =~ s/$/\\label\{sec$label\}/;
    $line =~ s/$trimheadings//g if $trimheadings;
}

sub heading_level
{
    my ($style) = @_;
    $heading_styles{$style} = ++$num_heading_styles
        if !$heading_styles{$style};
    $heading_styles{$style};
}

sub heading
{
    my ($hoffset, $heading) = $line =~ /^(\s*)(.+)$/;
    $hoffset = "" unless defined( $hoffset );
    $heading = "" unless defined( $heading );
    my ($uoffset, $underline) = $nextline =~ /^(\s*)(\S+)\s*$/;
    $uoffset = "" unless defined( $uoffset );
    $underline = "" unless defined( $underline );
    my ($lendiff, $offsetdiff);
    $lendiff = length($heading) - length($underline);
    $lendiff *= -1 if $lendiff < 0;

    $offsetdiff = length($hoffset) - length($uoffset);
    $offsetdiff *= -1 if $offsetdiff < 0;

    if (&is_blank($line)
       ||($lendiff > $underline_length_tolerance)
       ||($offsetdiff > $underline_offset_tolerance))
    {
	return;
    }

    $underline = substr($underline,0,1);

    $underline .= "C" if &iscaps($line); # Call it a different style if the
                                         # heading is in all caps.
    $nextline = &getline;             # Eat the underline
    $heading_level = &heading_level($underline);
    $line = "=n= " . $line if $numberheadings; # Mark where the number will go
    &tagline("$heading_tag[$heading_level]$nosectionnumbers");
    &label_heading( $heading_level );
    $line_action |= $HEADER;
}

sub custom_heading
{
    my ($i, $level);
    for ($i=0; $i <= $#custom_heading_regexp; $i++)
    {
        if ($line =~ /$custom_heading_regexp[$i]/)
        {
            if ($explicitheadings)
            {
                $level = $i + 1;
            }
	    else
	    {
                $level = &heading_level("Cust" . $i);
            }
	    $line = "=n= " . $line if $numberheadings; # Mark number slot
            &tagline("$heading_tag[$level]$nosectionnumbers");
            &label_heading($level);
            $line_action |= $HEADER;
            last;
        }
    }
}

sub unhyphenate
{
    my ($second);
    ($second) = $nextline =~ /^\s*([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/;
    $nextline =~ s/^(\s*)[^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*/$1/;

    $nextline = &getline if $nextline eq "";
    $line =~ s/\-\s*$/$second/;
    $line .= "\n";
}

sub untabify
{
    my ($line) = @_;
    while($line =~ /\011/)
    {
        $line =~ s/\011/" " x ($tab_width - (length($`) % $tab_width))/e;
    }
    $line;
}

sub tagit
{
    my ($tag, $line) = @_;
    if ($tag && !($tag =~ /null/))
    {
	$line =~ s/\A\s*(.*?)\s*\Z/\\$tag\{$1/;
	my $brackets += ($tag =~ s/\{//g);      # Count brackets in tag
	$brackets -= ($tag =~ s/\}//g);
	$line .= ("\}" x $brackets) . "\}";
    }
    $line;
}

sub tagline
{
    my ($tag) = @_;
    $line = &tagit($tag, $line) . "\n";
}

sub iscaps
{
    local ($_) = @_;
    # You may wish to add some chars to fit you needs
    /^[^a-z]*[A-Z]{$min_caps_length,}[^a-z]*$/;
}

sub caps
{
    if (&iscaps($line) && !&iscaps($nextline)
	&& (&is_blank($prev) || ($line_action & $END)))
    {
	&tagline($caps_tag);
	$line_action |= $CAPS;
    }
}

sub solo
{
    if (!&is_blank($line) && !($line_action & $CAPS) && &is_blank($nextline)
	&& (&is_blank($prev) || ($line_action & $END))
	&& !($line =~ /^\s*\\/)
	&& !($line =~ /([^\.]\.|[\:\;\,\!\-\'\"]|\))\s*$/))
        # You might want to add "?" to the last list
    {
	$line =~ s/^ *//g;
	$line =~ s/ *$//g;
	$line =~ s/ {3,}/\\hfill\{\}/g;
	
	if ($line =~ /(\.{4,}|\_{4,})/)
	{
	    $line =~ s/\.{4,}/\\dotfill\{\}/g;
	    $line =~ s/\_{4,}/\\hrulefill\{\}/g;
	}
	else
	{
	    &tagline($solo_tag);
	}

	$line_action |= $SOLO;
    }
}

# Convert very simple globs to regexps
sub glob2regexp
{
    my ($glob) = @_;
    # Escape funky chars
    $glob =~ s/[^\w\[\]\*\?\|\\]/\\$&/g;
    my ($regexp,$i,$len,$escaped) = ("",0,length($glob),0);

    for (;$i < $len; $i++)
    {
	$char = substr($glob,$i,1);
	if ($escaped)
	{
	    $escaped = 0;
	    $regexp .= $char;
	    next;
	}
	if ($char eq "\\") {
	    $escaped = 1; next;
	    $regexp .= $char;
	}
	if ($char eq "?") {
	    $regexp .= "."; next;
	}
	if ($char eq "*") {
	    $regexp .= ".*"; next;
	}
	$regexp .= $char;	# Normal character
    }
    "\\b" . $regexp . "\\b";
}

sub add_regexp_to_ruleset_table
{
    my ($key,$short_cut,$switches) = @_;
    # No sense adding a second one if it's already in there
    if (!$ruleset_table{$key})
    {
	# Keep track of the order they were added so we can
	# look for matches in the same order
	push(@ruleset_table_order, ($key));
	
	$ruleset_table{$key} = $short_cut;        # Put it in The Table
	$ruleset_switch_table{$key} = $switches;
	print STDERR
	    " ($#ruleset_table_order)\tKEY: $key\n\tVALUE: $short_cut\n\tSWITCHES: $switches\n\n"
		if ($dict_debug & 1);
    }
    else
    {
	if($dict_debug & 1)
	{
	    print STDERR " Skipping entry.  Key already in table.\n";
	    print STDERR "\tKEY: $key\n\tVALUE: $short_cut\n\n";
	}
    }
}

sub add_literal_to_ruleset_table
{
    my ($key,$short_cut,$switches) = @_;
    $key =~ s/(\W)/\\$1/g; # Escape non-alphanumeric chars
    $key = "\\b$key\\b"; # Make a regexp out of it
    &add_regexp_to_ruleset_table($key,$short_cut,$switches);
}

sub add_glob_to_ruleset_table
{
    my ($key,$short_cut,$switches) = @_;
    &add_regexp_to_ruleset_table(&glob2regexp($key),$short_cut,$switches);
}

# This is the only function that you would need to change, if you were to
# use a different dictionary file format.

sub parse_dict
{
    my ($dictfile, $dict) = @_;

    print STDERR "Parsing dictionary file $dictfile\n" if ($dict_debug & 1);

    $dict =~ s/^\#.*$//g;	 # Strip lines that start with '#'
    $dict =~ s/^.*[^\\]:\s*$//g; # Strip lines that end with unescaped ':'

    if($dict =~ /->\s*->/)
    {
	$message = "Two consecutive '->'s found in $dictfile\n";

	# Print out any useful context so they can find it.
	($near) = $dict =~ /([\S ]*\s*->\s*->\s*\S*)/;
	$message .= "\n$near\n" if $near =~ /\S/;
	die $message;
    }

    while ($dict =~ /\s*(.+)\s+\-+([\-ieofFt]+\-+)?\>\s*(.*\S+)\s*\n/ig)
    {
	my ($key, $short_cut,$switches,$options);
	$key = $1;
	$options = $2;
        $options = "" unless defined($options);
	$short_cut = $3;
	$switches = 0;
	$switches +=  1 if $options =~ /i/; # Case insensitivity
	$switches +=  2 if $options =~ /e/; # Evaluate as Perl code
	$switches +=  4 if $options =~ /o/; # Do only once
	$switches +=  8 if $options =~ /f/; # Footnote only
	$switches += 16 if $options =~ /F/; # Footnote plus triggering text
	$switches += 32 if $options =~ /t/; # Footnote plus triggering text

	$key =~ s/\s*$//; # Chop trailing whitespace

	if($key =~ m|^/|) # Regexp
	{
	    $key = substr($key,1);
	    $key =~ s|/$||; # Allow them to forget the closing /
	    &add_regexp_to_ruleset_table($key,$short_cut,$switches);
	}
	elsif($key =~ /^\|/) # alternate regexp format
	{
	    $key = substr($key,1);
	    $key =~ s/\|$//; # Allow them to forget the closing |
	    $key =~ s|/|\\/|g; # Escape all slashes
	    &add_regexp_to_ruleset_table($key,$short_cut,$switches);
	}
	elsif ($key =~ /\"/)
	{
	    $key = substr($key,1);
	    $key =~ s/\"$//; # Allow them to forget the closing "
	    &add_literal_to_ruleset_table($key,$short_cut,$switches);
	}
	else
	{
	    &add_glob_to_ruleset_table($key,$short_cut,$switches);
	}
    }
}

sub make_dictionary_ruleset_code
{
    my ($i,$pattern,$switches,$options,$code,$rule_item);
    $code = <<EOCode;
sub dynamic_make_dictionary_ruleset
{
    my (\$line_ruleset) = (\$line_action | \$RULESET);
    my (\$before,\$rulesetme,\$line_with_ruleset);
EOCode
    for ($i=1; $i <= $#ruleset_table_order; $i++)
    {
	$pattern = $ruleset_table_order[$i];
	$key = $pattern;
	$switches = $ruleset_switch_table{$key};
	
	$s_sw = "";		# Options for searching
	$s_sw .= "i" if ($switches & 1);
	
	$r_sw = "";		# Options for replacing
	$r_sw .= "i" if ($switches & 1);
	$r_sw .= "e" if ($switches & 2);

	$rule_item = $ruleset_table{$key};

	$rule_item =~ s@/@\\/@g;

	if ($switches & 32)
	{
	    $rule_item =~ s/^\s*//;
	    $rule_item = &tagit($rule_item, '$1');	
	    $rule_item =~ s/(\\|\{|\})/\\$1/g;
	}

	$rule_item = '\\\\footnote{' . $rule_item . '}' if ($switches & 8);
	$rule_item = '$&' . '\\\\footnote{' . $rule_item . '}'
	    if ($switches & 16);
	
	$code .= "    \$line_with_ruleset = \"\";";
        if($switches & 4) # Do ruleset only once
        {
	    $code .= "
    while(!\$done_with_ruleset[$i] && \$line =~ /$pattern/$s_sw)
    {
        \$done_with_ruleset[$i] = 1;
";
	}
	else
	{
            $code .= "\n    while(\$line =~ /$pattern/$s_sw)\n    {";
        }
	$code .= <<EOCode;
	\$ruleset_line = $RULESET if(!\$ruleset_line);
	\$before = \$\`;
	\$rulesetme = \$&;
	
	\$line = substr(\$line, length(\$before) + length(\$rulesetme));
	\$rulesetme =~ s/$pattern/$rule_item/$r_sw;
	\$line_with_ruleset .= \$before . \$rulesetme;
    }
    \$line = \$line_with_ruleset . \$line;
EOCode
    }
    $code .= <<EOCode;

    \$line_action |= \$line_ruleset; # Cheaper only to do bitwise OR once.
}
EOCode
    print STDERR "$code" if ($dict_debug & 2);
    eval "$code";
    if($@)
    {
        print STDERR "Problem making dictionary eval code\n";
        die $@;
    }
    $code;
}

sub load_dictionary_ruleset
{
    my ($dict, $contents);
    @ruleset_table_order = 0;
    %ruleset_table = ();

    foreach $dict (@ruleset_dictionaries)
    {
        next unless $dict;
	my $last_try = "$ENV{'HOME'}/.txt2tex-$dict";
        open(DICT, "$dict")
	    || open(DICT, "$dict-ruleset")
	    || open(DICT, "$last_try")
	    || die "Can't open Dictionary file $dict\n";
        $contents = "";
        $contents .= $_ while(<DICT>);
        close(DICT);
        &parse_dict($dict, $contents);
    }
    &make_dictionary_ruleset_code;
}

sub make_dictionary_ruleset
{
    eval "&dynamic_make_dictionary_ruleset;";
    warn $@ if $@;
}

sub getline
{
    my ($line);
    $line = <>;
    $line = "" unless defined ($line);

    if (!$mac)                         # Chop trailing whitespace and DOS CRs
    {
	$line =~ s/[ \011]*\015$//;
    }
    else
    {
	$line =~ s/[ \011]*$//;
    }

    $line = &untabify($line);           # Change all tabs to spaces
    if ($trim =~ /^\d+$/)
    {
	$line =~ s/^.{$trim}//;         # Trim lines if requested by --trim
    }
    else
    {
	$line =~ s/$trim//g;            # Assume $trim is a regexp
    }
    $line;
}

sub main
{
    $* = 1; # Turn on multiline searches
    push(@ruleset_dictionaries,($default_ruleset_dict))
        if ($make_ruleset && (-f $default_ruleset_dict));
    &deal_with_options;
    if($make_ruleset)
    {
        push(@ruleset_dictionaries,($system_ruleset_dict))
	    if -f $system_ruleset_dict;
        &load_dictionary_ruleset;
    }

    $non_header_label = 0;

    # Moved this way up here so we can grab the first line and use it
    # as the title (if --titlefirst is set)
    $mode = 0;
    $listnum = 0;
    $list_indent = "";
    $line_action = $NONE;
    $prev_action = $NONE;
    $prev_line_length = 0;
    $prev_indent = 0;
    $prev = "";
    $line = &getline;
    $nextline = 0;
    $nextline = &getline if $line;

    # Skip leading blank lines
    while (&is_blank($line) && $line)
    {
	$prev = $line;
	$line = $nextline;
	$nextline = &getline if $nextline;
    }

    if (!$extract)
    {
        print "$doctype\n\n" unless !$doctype;

	print "$usepackages\n" if $usepackages;

	print "\\typeout\{Using LaTeX hooks with prefix ``$latexhook''.}\n"
	    if $latexhookmode;
	&hook("$latexhook-HeadB", "", "No HeadB")
	    if ($latexhookmode & 1);

        # if --titlefirst is set and --title isn't, use the first line
        # as the title.
        if ($titlefirst && !$title)
        {
            ($title) = $line =~ /^ *(.*)/; # grab first line
            $title =~ s/ *$//; # strip trailing whitespace
	    $line = "";
        }

	if ($batchmode)
	{
	    print "\\batchmode\n\n";
	}


	if ($nopagenumbers)
	{
	    print "\\pagestyle\{empty\}\n\n";
	}

        # Setting the margins as below will give 1in margins for
        # any paper size.

	if ($smallmargins & 1)  # x margins
	{
	    print "\\setlength\{\\oddsidemargin\}\{0in\}\n";
	    print "\\setlength\{\\textwidth\}\{\\paperwidth\}\n";
	    print "\\addtolength\{\\textwidth\}\{-2in\}\n";
	    print "\\setlength\{\\marginparwidth\}\{.7in\}\n\n";
	}

	if ($smallmargins & 2)  # y margins
	{
	    print "\\setlength\{\\headheight\}\{0in\}\n";
	    print "\\setlength\{\\headsep\}\{0in\}\n";
	    print "\\setlength\{\\textheight\}\{\\paperheight\}\n";
	    print "\\addtolength\{\\textheight\}\{-2in\}\n";
	    print "\\setlength\{\\topmargin\}\{0in\}\n\n";
	}

	if ($samepageverbatim || ($escapemode & $ESCVERT))
	{
	    print "\\let\\olddospecials=\\dospecials\n";
	    print "\\def\\dospecials\{$samepageverbatim";
	    print "\\catcode\`\|=11 "
		if ($escapemode & $ESCVERT);
	    print "\\olddospecials\}\n\n";
	}

	print "\\let\\leftangle=<\n"    if ($escapemode & $ESCANGLES);
	print "\\let\\rightangle=>\n"   if ($escapemode & $ESCANGLES);
	print "\\let\\doublequote=\"\n" if ($escapemode & $ESCDQUOTE);
	print "\\let\\dollar=\\\$\n"    if ($escapemode & $ESCDOLLARS);

        print "\\catcode\`\\\_=\\active\n" if ($escapemode & $ESCSCRIPTS);
        print "\\catcode\`\\<=\\active\n"  if ($escapemode & $ESCANGLES);
        print "\\catcode\`\\\>=\\active\n" if ($escapemode & $ESCANGLES);
        print "\\catcode\`\\\|=\\active\n" if ($escapemode & $ESCVERT);
	print "\\catcode\`\\\"=\\active\n" if ($escapemode & $ESCDQUOTE);
        print "\\catcode\`\\\$=\\active\n" if ($escapemode & $ESCDOLLARS);

	# We use mboxes to avoid errors if equations are added to be source,
        # and it works in old versions of LaTeX too.

	print "\\def<\{\\mbox\{\\\(\\leftangle\\\)\}\}\n"
	    if ($escapemode & $ESCANGLES);
	print "\\def>\{\\mbox\{\\\(\\rightangle\\\)\}\}\n"
	    if ($escapemode & $ESCANGLES);
	print "\\def\|\{\\mbox\{\\\(\\mid\\\)\}\}\n"
	    if ($escapemode & $ESCVERT);
	print "\\def\"\{\\mbox\{\\texttt\{\\doublequote\}\}\}\n"
	    if ($escapemode & $ESCDQUOTE);
	print "\\def\$\{\\dollar}\n"
	    if ($escapemode & $ESCDOLLARS);

        print "\\catcode\`\\\%=11\n" if ($escapemode & $ESCPERCENT);
        print "\\catcode\`\\\#=11\n" if ($escapemode & $ESCNUM);
        print "\\catcode\`\\\&=11\n" if ($escapemode & $ESCAND);
        print "\\catcode\`\\\^=11\n" if ($escapemode & $ESCSCRIPTS);
	print "\\catcode\`\\\~=11\n" if ($escapemode & $ESCTILDA);
	print "\n"
	    if $escapemode & ($ESCPERCENT|$ESCNUM|$ESCAND|$ESCSCRIPTS
			      |$ESCTILDA);

	print "\\setlength\{\\parindent\}\{0em\}\n";
	print "\\setlength\{\\parskip\}\{2ex\}\n\n";

	&hook("$latexhook-HeadE", "", "No HeadE")
	    if ($latexhookmode & 2);

	print "\\begin\{document\}\n\n";

	if ($title)
	{
	    $title = &tagit($titletag, $title);
	    print "$title\\vskip 4ex\n\n";
	}
    }

    &hook("$latexhook-BodyB", "", "No BodyB")
	if ($latexhookmode & 4);

    print "\\sloppy\n\n" if $sloppy;

    unless($extract)
    {
	print "\\iffalse\n";
	print "Converted from plain text with txt2tex $version\n";
	$mytime = gmtime(time);
	$mytime =~ s/\s+/ /g;
	$mytime =~ s/\:\d\d / /;
	print "on $mytime GMT.\n\n".'$Id: txt2tex.pl 4051 2007-07-01 07:38:34Z kalvis $'."\n\\fi\n";

    }

    $clearlines = 1;

    do
    {
        if (!$ruleset_only)
        {
	    $line_length = length($line); # Do this before tags are added
	    $line_indent = &count_indent($line, $prev_indent);

	    &endpreformat
		if (($mode & $VERB) && ($preformat_trigger_lines != 0));

	    &preformat
		if (!($line_action & ($HEADER | $LIST | $MAILHEADER))
		    && !($mode & ($VERB | $LIST))
		    && !($prev_action & $MAILHEADER)
		    && ($endpreformat_trigger_lines != 0));

	    if (!($mode & $VERB))
	    {
		&escape if ($escapemode & $ESCBSLASH);
		
		&unhyphenate
		    if ($unhyphenation && ($line =~ /[^\W\d_]\-$/) #end hyphen
			&& ($nextline =~ /^\s*[^\W\d_]/) # starts with letters
			&& !($mode & ($HEADER | $MAILHEADER | $BREAK)));

		&mailstuff if ($mailmode && !($line_action & $HEADER));

		&hrule;
		
		&custom_heading if (($#custom_heading_regexp > -1)
				    && !($line_action & $HRULE));

		&heading if (!$explicitheadings
			     && !($line_action & ($HRULE | $HEADER))
			     && $nextline =~ /^\s*[=\-\*\.~\+\^]+\s*$/);

		# Maybe we should add $MAILHEADER to the list below.
		&liststuff 
		    if (!&is_blank($line)
			&& !($line_action & ($HRULE | $HEADER)));

		# Give preformat another chance now we have tried lists
                # You can remove the following block with little loss.
		&preformat
		    if (($line_action & $END)
			&& !($line_action & ($HEADER | $LIST | $MAILHEADER
					     | $HRULE))
			&& !($mode & $LIST) && !($prev_action & $MAILHEADER)
			&& ($endpreformat_trigger_lines != 0));
	    }
	
	    if (!($mode & $VERB))
	    {
		&paragraph unless ($prev_action & $BREAK);

		&shortline unless (($mode & $CAPS)
				   || ($line_action & $PAR));

		&caps if $caps_tag;
		
		&solo if ($solo_tag && !($mode & $LIST)
			  && !($line_action & ($HEADER|$MAILHEADER|$HRULE)));
	    }
	}
	
	if (!($mode & $VERB))
	{
	    &make_dictionary_ruleset              # Trakgalvis removal
		if ($make_ruleset                 # && !&is_blank($line)
		    && $#ruleset_table_order);

	    &tidy;

	    # All the matching and formatting is done.  Now we can
	    # replace non-ASCII characters with character entities.

	    @chars = split(//,$line);
	    foreach $_ (@chars)
	    {
		$_ = $char_entities{$_} if defined($char_entities{$_});
	    }
	
	    $line = join("", @chars);
	}

        # Print it out and move on, but avoid printing too much white space
	# to the LaTeX file.

	if ($prev !~ /\A\s*\Z/)
	{
	    print "\n" if $clearlines ;
	    print $prev unless ($prev_action & $MAILHEADER)
		&& ($mailmode & $MAILBODY);

	    if (($line_action & ($SOLO | $CAPS | $HEADER | $HRULE
				 | $PAR | $END))
		|| ($prev_action & ($HEADER | $SOLO | $CAPS | $HRULE)))
		# The last item ensures a blank line after such events.
                # With current settings, we do not need ``$SOLO'' here,
                # but I often forget to put it in for non-standard runs.
	    {
		$clearlines = 1;
	    }
	    else
	    {
		$clearlines = 0;
	    }
	}
	else
	{
	    $clearlines++;
	}


	if (!&is_blank($nextline))
	{
	    $prev_action      = $line_action;
	    $line_action      = $NONE;
	    $prev_line_length = $line_length;
	    $prev_indent      = $line_indent;
	}

	$prev = $line;
	$line = $nextline;
	$nextline = &getline if $nextline;
    }
    until (!$nextline && !$line && !$prev);

    $prev = "";
    &endlist($listnum) if ($mode & $LIST); # End all lists
    print $prev;

    print "\n" unless ($mode & $VERB);

    print "\\end\{verbatim\}\n\n" if ($mode & $VERB);

    &hook("$latexhook-BodyE", "", "No BodyE")
	if ($latexhookmode & 8);

    print "\\end\{document\}\n" if (!$extract);
}

&main();

__END__
#h
#h Option sets
#h ~~~~~~~~~~~
#h
#h Below the ``__END__'' in txt2tex you can put lists of command line
#h options after a ``keyword''; these can then be loaded by putting
#h ``-sw keyword'' on the command line.  Note that ``\'' is a continuation
#h character, so long options can be put on several lines.  These include:
#h
#h * remind --- turns the output of the unix remind program into nice LaTeX;
#h call remind using ``rem -n |sort''.
remind -ec 1919 -sm -nv -s 100 -t Appointments -np -tt section*{\today:~~ \
-de \d\d\d\d.(\d\d.\d\d(\s*\d\d.\d\d)?)

#h * num --- picks out simple numbered headings.
num -H ^\s*\d+\.\s -H ^\s*\d+\.\d+\.\s -H ^\s*\d+\.\d+\.\d+\.\s -ns

#h * n --- a variant of the above.
n -H ^\s*\d+\.\s -H ^\s*\d+\.\d+\.\s -H ^\s*\d+\.\d+\.\d+\.\s -TH (\d+\.)+\s+

ind -H ^\s{2}[^\d\s\*] -H ^\s{4}[^\d\s\*] -H ^\s{6}[^\d\s\*]

#h * plain --- a very plain style, which is good for university work!
plain +rs -ec -ns -np -ct off -st off

#h * trim --- removes leading spaces before txt2tex processes the line.
trim -tr ^[^\S\n]*

#h * lynx --- for lynx browser output.
lynx +rs -sm 1 -tr \[.*?\]-? -ec -ss 1ex -H ^\w
ltc +rs -sm -tr \[.*?\]-? -ec -ss 1ex -H ^\w -tc -11 -ns -tt section* -nv
lyn +rs -sm 1 -tr \[.*?\]-? -ec -ss 1ex

#h * noL --- normally \014 produces a LaTeX ``clearpage'', but this option
#h removes \014 before txt2tex sees the line.
noL -tr \014

#h * HH --- this is what I use to print the ``Happy Hacker'' newsletter.
HH -ec -r off -ns -s 60 +rs

#h * man --- useful for dealing with unix man pages, but could be better!
man -tr .\010 -bm -ec +rs -pb 3 -pe 3 -p 20,6,2 -11pt -sm -sv -pi 10
grof -tr .\010

#h * pagesec --- each new section starts on a new page.
pagesec -HT clearpage\section,subsection,subsubsection,paragraph,subparagraph

#h * pagesubsec --- each subsection starts on a new page.
pagesubsec -HT clearpage\section,clearpage\subsection,subsubsection,paragraph

#h * slides --- turns plain text into (very) simple slides.  You might also
#h want to set ``noverbatim''.  Note that many of the standard options will
#h not work with switch ``slides'' set.
slides -ec -sloppy +rs -dt \documentclass{slides} -ct textbf -st textbf \
-HT clearpage\textbf{\centerline{\Large,textbf{\Large,textbf{\large,textbf \
-up times

#h * handout --- used for student handouts.
handout -ec 0 +rs -sm -ns -HT subsection,subsubsection,subsubsubsection \
-ct subsection* -st subsection*

#h * letter --- used for writing letters, but you need to define your own
#h letter-hook files with your address etc.
letter -d -ec -dt \documentclass[12pt]{letter} -lh letter -ct textbf -st off \
-HT textbf{\large,textbf{\emph +rs -rs letter -lh 9

#h * preview --- not for LaTeXing, but marks up the file in a manner to show
#h you what txt2tex was thinking; this can help in choosing the right tags
#h etc. for the print run.  It can be followed by other options, so you can
#h see how that changes the mark up.  It is also useful for debugging, but that
#h is probably my job [:-)]
preview -e -ec -ct Caps -st Solo -HT HeadOne,HeadTwo,HeadThree,HeadFour \
-ss 1ShortLine -t Title -tt TitleTag

#h * dict --- turns a list of the form `word: text' into a LaTeX description
#h environment.
dict -de (\w+?\:)
#h * phone --- turns a list of the form `phrase: text' into a LaTeX description
#h environment.  I use this for a personal phone book.
phone -de (.*\:) +rs -11 -sm
# ``word space word'' or ``word'' with more than two letters.
di -de (((\w+)\s*){2}\:)
#h * fn --- turns fancy numbered lists, with numbers like 1.1.1, into LaTeX
#h description environments.  Often useful for printing contracts off the net!
fn -pi 10 -de ((\d+\.)+(\d+)?) +rs
#h * lpr --- used as part of a fancy plain text printer filter.
lpr -d -ec -sm +rs -nv -ns -np -r off
lpr2 -d -ec -sm +rs -nv -ns -np -r off -tc
#h * lpn --- used by the Lockpicker Network.
lpn -tf -st off -de \*\s+(\w+)\s+\-\-\- -d -ec -sm +rs -nv -ns -r off -11
#h * netrc --- used to print a .netrc file.
netrc -d -sm -de machine(.*)login 
# Used by the ``--demo'' option.
demo -bm -ns -ec -tc -10 -de ^\s\*\s(\S+) +rs -sm -sv 
AB -de \*.* -t Addresses -sm -s 1000 -tc -nv -d

# Used to print a single simple page
one +rs -nv -ns -np -r off

# Use for my weather reports
laiks  -pb 1 -tf -np 

#===========================================================================
#H
#H A sample ruleset
#H ~~~~~~~~~~~~~~~~
#H
#H Txt2tex by default tries to load a file called ``.txt2tex-ruleset'' from
#H your home directory (assuming you are using a unix system).  This file, if
#H it exists, contains transformation rules that are executed AFTER all other
#H txt2tex subroutines with the exception of ``tidy'' (which does a little
#H cleaning up) and the escaping of ``funny'' characters. Strange behaviour
#H can result from not keeping the time of execution in mind.
#h
#h I most often use ``rulesets'' for writing my own documents in plain text, to
#h be transformed later by txt2tex into LaTeX.  So let us look at rules
#h that help in such tasks.  Each rule must be on a single line in the ruleset
#h file.
#H
#H /<<(.*?)>>/ -f-> $1
#h
#h The ``-f->'' type rule, when the regular expression on the left matches,
#h takes the expression on the right and turns it into a footnote, then
#h removes the triggering text.  So the above example transforms
#h ``Kalvis M. Jansons<<Mathematics, UCL>>'' into
#h ``Kalvis M. Jansons\footnote{Mathematics, UCL}'' in the LaTeX output.
#H
#H Kalvis M. Jansons -Fo-> Email: kalvis\@jansons.org
#h
#h The ``-F->'' type rules are the same as the ``-f->'' ones, but do not
#h remove the triggering text.  So the above rule adds a footnote with my email
#h address to my name.  So that this happens once only per document, I have
#h added the ``o'' (for once) in the rule.
#H
#H /txt2tex/ -oi-> TXT2TeX \\emph{(written by Kalvis)}
#H
#H /pheonix/ ---> phoenix
#h
#h The above rules are simple transformations, the first is case insensitive,
#h hence the ``i'', and is executed once only.  The second corrects a common
#h spelling error (every time it occurs).
#H
#H /tagad/ -ie-> my $time = localtime(time); $time =~ s/\:\d\d\s.*//; $time
#h
#h The ``e'' option means evaluate the righthand side as a perl expression.
#h So the above expression turns ``tagad'' (the Latvian for ``now'') into the
#h current date and time (and removes ``tagad'').  The ``e'' option can also
#h be used to change the value of txt2tex parameters while running, by setting
#h them when certain patterns are first encountered.
#H
#H /\*([a-z][a-z ]*[a-z])\*/ -ti-> emph
#H
#H /\*([a-z])\*/ -ti-> emph
#h
#h The ``t'' option is used to tag the text in (), so leads to a shorter
#h rule than could be obtained using the above rules to do this job.
#h The above rules put any sequence of letters and spaces which are between
#h two stars in the LaTeX ``emph'' style.  This use of ``*'' is often seen
#h in plain text ``readme'' files.
#H
#H /<\*(.*?)\*>/ -tfi-> textbf
#H
#H Putting a few bits together, we can turn anything in <* ... *> into a 
#H ``textbf'' footnote, but I am sure you can think of a better application.
#h
#h Saving the sample ruleset
#h .........................
#h
#h If you want to save this sample ruleset to adapt for your own use, type
#h ``txt2tex -sampleruleset > ~/.txt2tex-ruleset'',
#h
#h or direct it into a different file if you do not want it to be the default.
#h
#h Getting help
#h ~~~~~~~~~~~~
#h
#h Please contact me (Kalvis) with any problems or suggestions.
#h
#h Bugs
#h ~~~~
#h
#h Send any bug reports to me, and I will do my best to fix them, but note that
#h there is a limit to what txt2tex can be expected to do on poorly formatted
#h text files.  For such files, it is often better to fix the worst features
#h before giving them to txt2tex; then there should not be the need to do much
#h work, if any, on the LaTeX file produced.
#h
#h Ensure that you are using the latest version, which can be obtained from
#h any CTAN site.
#h
#h Kalvis@Jansons.org

# $Id: txt2tex.pl 4051 2007-07-01 07:38:34Z kalvis $

# ``Kalvis'' is the name of a Baltic God; a ``magical'' blacksmith who created
# the Sun and placed Her in the heavens.