#!/usr/bin/env texlua

-- texlogsieve - filter and summarize LaTeX log files
--
-- Copyright (C) 2021-2025 Nelson Lago <lago@ime.usp.br>
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program.  If not, see <https://www.gnu.org/licenses/>.
--
-- Code etc: https://gitlab.com/lago/texlogsieve

--[[

----------------
THE TeX LOG FILE
----------------

TeX uses three low-level commands for logging:

 1. \message -> outputs to both the console and the log file (most
    low-level TeX messages, such as open/close files, shipouts etc.
    behave *more or less* as if they were generated by \message)

 2. \write with an unallocated stream identifier (typically, 0) ->
    also outputs to both the console and the log file.

 2. \write with a negative stream identifier (typically, -1) -> outputs
    to the log file only

\write and \message behave differently:

 * Normally, \write appends a line feed character (LF) to the text, so
   a sequence of \write commands results in multiple lines.

 * \message checks whether the last thing that was sent out was another
   \message; if so, it adds a space character and outputs the text (both
   the previous text and the new one are on the same line, separated by
   a space), otherwise it just outputs the text (in this case, the new
   text is at the beginning of a line). Note, however, that there are
   some \message's that are not separated by spaces, such as "))".
   Also, in most cases (I could not figure out when this fails, but it
   happens often), if \message realizes the new text will not fit in
   the current line (the line would exceed max_print_line characters),
   instead of wrapping the line as usual, it may output a LF and start
   the text on a new line.

 * \write also checks if the last thing that was sent out was a message;
   if so, it sends LF before the text, so that it starts at the
   beginning of a line (it also sends LF after the text too, as always)

Therefore, in the console, text output with \write always appears on a
new line. A sequence of calls to \message also always starts on a new
line, but the texts of all of them appear on the same line, separated
by spaces.

However, things get messy in the log file. Basically, \message and
\write0 modify the same filedescriptor, while \write-1 modifies a
different filedescriptor. This means \message and \write0 are unaware
of \write-1 and vice-versa. As a result, the spaces and LFs that are
added according to what is output to the console get mixed up with what
is written only to the log file with \write-1. Therefore, there may be
unexpected empty lines or lines that start with a space character in
the log file.

The LaTeX command \GenericInfo uses \write-1, while \GenericWarning
uses \write0. TeX and LaTeX also define \wlog, which is an alias for
\write-1; LaTeX defines \typeout, which is an alias for \write0. Some
packages define their own aliases; for example, pgfcore.code.tex does
\def\pgf@typeout{\immediate\write0}, graphics.sty does \let\Gin@log\wlog,
etc. Package infwarerr provides a compatibility layer for the LaTeX
standard logging commands, so that they can be used both in LaTeX and
in plain TeX, as \@PackageInfo, \@ClassWarning etc.

With that in mind, we will consider that there are five kinds of message
in the LaTeX log file:

 * Ordinary messages -> messages that start at the beginning of the line
   and end at the end of the line (they are created with \write), such as

       Document Class: book 2019/08/27 v1.4j Standard LaTeX document class

       For additional information on amsmath, use the `?' option.

       \openout4 = `somefile'.

 * Short messages -> messages that may begin/end anywhere on a line,
   because the line they are in may contain multiple messages of
   this type (they are created with \message), such as

       ASCII Hyphenation patterns for American English

       ) (/usr/share/texlive/texmf-dist/tex/generic/iftex/iftex.sty
       -> these mean 'close last file' and 'open file .../iftex.sty'

       (/usr/share/texlive/texmf-dist/tex/latex/etoolbox/etoolbox.sty)
       -> these mean 'open file .../etoolbox.sty' and 'close last file'
          (which is etoolbox.sty, obviously)

       ))
       -> these mean 'close the last two files'

       [1] [2] [3]
       -> these mean 'shipout pages 1, 2, and 3'

 * Multiline messages -> messages that start at the beginning of a line,
   continue for a few lines and end at the end of a line (the programmer
   explicitly used multiple \write commands, a single \write command
   with embedded newline characters, or single/multiple \message
   commands with leading/trailing newline characters), such as

       Package biblatex Info: Automatic encoding selection.
       (biblatex)             Assuming data encoding 'utf8'.

       *************************************
       * Using libertinus math *
       *************************************

       **********************
       * No checksum found! *
       **********************
       (from ydoc-doc.sty)

 * "Best-effort" multiline messages -> similar to the previous ones,
   but created using multiple \message commands or a single one with
   embedded newline characters but no leading and/or trailing newline
   characters. They *usually* begin at the beginning of the line and
   end at the end of the line because, as mentioned before, TeX usually
   inserts a line break if the line would not fit otherwise. However,
   that is not always true, especially if max_print_line is large.
   Therefore, sometimes the message may begin in the middle of a line
   and/or the various lines may be strung together or wrapped.
   Examples:

       *****************************************************************
         GFS-Solomos style file by A. Tsolomitis
       *****************************************************************
       (from gfssolomos.sty)

       =============== Entering putframe ====================
       \pagegoal =635.97621pt, \pagetotal =368.07768pt.
       (from exam.cls)

 * Error messages -> when an error occurs, TeX usually writes something
   like "! Some error" followed by lines that resemble a stack trace of
   where the error occurred. For example, this input:

       Some text \blah, something else

   would result in

       ! Undefined control sequence.
       l.5 Some text \blah
                          , something else

   With option -file-line-error, the exclamation is replaced by an
   indication like "filename:linenum:".

   The "stack trace" is comprised of pairs of lines. In each pair,
   the first line indicates the content in a given line leading up
   to the error, while the second one shows the subsequent content of
   that line. The first line is at most half_error_line characters long
   (default 50) and the second line is at most error_line characters
   long (default 79). The second line is indented to start after the
   end of the first, as in the example above. If the content of either
   segment does not fit, a part of it (the beginning for the first line
   and the end for the second line) may be replaced by "...". error_line
   must be < 255 and half_error_line must be < error_line -15. If
   error_line >= max_print_line or half_error_line >= max_print_line,
   TeX does line wrapping as usual (see below). If either is exactly
   max_print_line (which is true for the default values), things get
   confusing, so TeX may add a blank line when wrapping.

   By default, LaTeX only shows the first and last pairs of lines of the
   "stack trace" (if only one pair of lines is relevant, then obviously
   only this pair is shown). The number of additional, intermediate pairs
   of lines shown is determined by \errorcontextlines: if it is zero, no
   additional lines are shown, but LaTeX prints "..." indicating they
   were omitted. If it is -1 (the default), they are not shown and there
   is no "..." indication.

   LaTeX errors are similar, but usually follow the format

       ! LaTeX/Package/Class BLAH Error: some description
       See the BLAH documentation for explanation.
       Type H <return> for immediate help.
        ...
       l.5 First error line
                           second error line

   In errorstop mode, such errors are followed by a "?" prompt.

   Finally, runaway arguments have a different format:

       Runaway argument?
       some text\ETC.
       ! SOMETHING ended...

To complicate things, TeX by default wraps (breaks) any line longer
than max_print_line characters (by default, 79). Since some messages
may be output together on a single line, even short messages may be
broken across lines. At the same time, there are quite a few ordinary
lines that in practice happen to be max_print_line characters long,
which makes detecting line wrapping a real challenge. And, just to
make things more interesting, sometimes there is a blank line between
the wrapped line and its continuation line.

pdfTeX (and, I suppose, traditional TeX) counts characters as bytes to
choose where to wrap a line. XeTeX, however, counts utf-8 characters
(I do not know whether that's code points or graphemes), so we need to
take that into consideration.

LuaTeX adds some extra complications: for no apparent reason, it wraps
some lines at max_print_line characters and others at max_print_line +1
characters. It also sometimes "forgets" to wrap a line. And more! It
does not break a line in the middle of a multibyte UTF-8 character.
That is obviously a good idea, but it counts the line length in bytes,
like pdfTeX, which means some lines may be broken at lengths smaller
than max_print_line. While this may seem rare, it can happen when parts
of the document text are included in the log, as is the case with
over/underfull box messages.

So, if at all possible, it is a very good idea to set max_print_line
to a really large value (such as 100,000), effectively disabling line
wrapping. It was useful in the 1980s, but not anymore (your terminal or
editor wraps automatically). Likewise, error_line and half_error_line
should be, respectively, 254 and 238 (more about these values here:
https://tex.stackexchange.com/a/525972).


----------------------
HOW THIS PROGRAM WORKS
----------------------

To read this section, pretend for a moment that TeX does not wrap long
lines.

We have a set of handlers, i.e., objects that process specific kinds
of message. Each handler checks if a given line matches some patterns
to decide whether it is supposed to process it (return true) or not
(return false).

There is a loop that, at each iteration, reads a new line from the log
file with moreData() and uses chooseHandler() to call each handler in
turn until one of them processes that line. After the right handler
does its thing, it sends the message to the output, erases this input
line and returns true, which causes the loop to restart, skipping the
remaining handlers.

The loop may behave a little differently in two cases:

 1. If the handler processes just a part of the input line, which may
    happen if the line contains multiple short messages, it removes
    the processed data from the line and leaves the rest. At the next
    iteration, no new data is read: the loop processes the remaining
    material from the same line.

 2. If the handler processes the input and expects specific lines of
    content to follow, it can preset itself as the next handler,
    temporarily bypassing the choice step.

When the line (or part of it) is not recognized by any handler, it is
appended to unrecognizedBuffer. We may sometimes concatenate multiple
text chunks together here because, during processing, we may break the
line into chunks when in fact it comprises a single message. In any
case, we know for sure that the message (or messages) currently in
unrecognizedBuffer is complete when either (1) we identify the next
short message in the line or (2) we proceed to the next input line.
When either happens, we send the buffer to the output.

We know most messages, but not all, start at the beginning of a line.
Therefore, we keep two sets of handlers: beginningOfLineHandlers and
anywhereHandlers. We use the boolean atBeginningOfLine to know when
we may skip trying the beginningOfLineHandlers.

We mentioned that the handler "sends the message to the output". The
handler actually creates a Message object and calls the dispatch()
function to send that object to a coroutine that handles output. The
coroutine aggregates messages by page, generates summaries for some
kinds of message (for example, it may list all undefined references
together) and prints out the report in different formats.

Sometimes we need to be able to check the content of the next line
to decide what to do about the current line (for example, different
messages may begin with a line like "**********"). So, we do not only
read one line from the input at each iteration; instead, we have a
buffer with the next few lines. When the current line has been
completely processed, moreData() simply refills the buffer and calls
Lines:gotoNextLine(), which makes the next line (line 1) become the
current line (line 0), the following line (line 2) become the next
line (line 1) etc.

That would be all if TeX did not wrap long lines, but it does. To unwrap
lines when needed, we (1) check that the line is max_print_lines long;
if so, we (2) check whether the next line is the beginning of a known
message; if it is not, we (3) check whether unwrapping lines makes us
recognize a message that was not identified without unwrapping. Because
of 3, we do this in each of the handlers and not beforehand. To unwrap
a line, we simply join the current line with the next one from the
buffer.

Note that, if you reconfigure the variable max_print_line to a value
larger than 9999 (which is a good thing to do), this program assumes
(quite reasonably) that there is no line wrapping.


------------
THE HANDLERS
------------

We want to be able to explicitly recognize as much messages as possible
(and leave the least amount possible for the unrecognizedBuffer) for at
least three reasons:

 1. A character such as "(", ")", "[", or "]" in an unrecognized message
    may confuse the program (these chars may indicate open/close file
    and begin/end shipout). If it is part of a known message, it will
    not be mistaken for an open/close file etc.

 2. By default, TeX wraps lines longer than max_print_line characters,
    and unwrapping them involves making sure that the following line
    is not the start of a new message. For this to be reliable,
    unknown messages should be kept to a minimum.

 3. We can assign severity levels to known messages; unknown messages
    must always be handled as high-severity, polluting the output.

At each iteration, chooseHandler() calls doit() for every handler. The
handler returns true to signal that chooseHandler() should proceed to
the next iteration. It does that if it did "something" to change the
status for the next iteration:

 * It processed a complete message in this iteration (i.e., the whole
   message was contained in the current line);

 * It processed the message partially and defined nextHandler;

 * It finalized the processing of a previous partial message (i.e., it
   realized the message has ended). This only happens if the handler
   was previously set as nextHandler. When this happens, it sometimes
   does nothing with the content of the current line, only outputs the
   complete message. Still, when the loop restarts, it will no longer
   be the nextHandler (and that is "something").

A handler must provide:

1. A doit() method that returns true or false to indicate whether it
   actually did something with the current line or not.

2. A canDoit(position) method that returns true or false to indicate
   whether the handler can/should process the line given by "position"
   (we use this to identify wrapped lines, as explained later).

3. An init() method to do any necessary setup after the command line
   has been read (it is ok to do nothing). The easiest way to do this
   is to "inherit" from HandlerPrototype.

4. For handlers that deal with messages that can appear in the middle
   of a line, a lookahead(position) method to indicate whether there is
   a message that the handler can/should process in the line indicated
   by "position" when there is something else in the line before that
   message.

Besides true/false, canDoit() also returns a table with extra data. In
some cases, this table is empty; in others, the handler's doit() method
knows what to do with it. There are two places outside the handler
itself where this data is used:

1. In handleUnrecognizedMessage(), where we call lookahead() and use
   the value of "first" that should be embedded in this table.

2. In Lines:noHandlersForNextLine(), where we treat openParensHandler
   and openSquareBracketHandler specially and read file/page data from
   them.

A simple handler:
-----------------

  exampleHandler = {}
  exampleHandler.pattern = '^%s*L3 programming layer %b<>'

  function exampleHandler:init()
  end

  function exampleHandler:canDoit(position)
      if position == nil then position = 0 end
      local line = Lines:get(position)
      if line == nil then return false, {} end

      local first, last = string.find(line, self.pattern)
      if first == nil then return false
      return true, {first = first, last = last}
  end

  function exampleHandler:doit()
      local myTurn, data = self:canDoit()
      if not myTurn then return false end

      flushUnrecognizedMessages()
      local msg = Message:new()
      msg.severity = DEBUG
      msg.content = string.sub(Lines.current, 1, data.last)
      dispatch(msg)
      Lines:handledChars(data.last)
      return true
  end

  function exampleHandler:lookahead(position)
      local tmp = self.pattern
      self.pattern = string.sub(self.pattern, 2) -- remove leading '^'
      local result, data = self:canDoit()
      self.pattern = tmp
      -- Only return true if whatever matches
      -- is not at the beginning of the line
      if result and data.first == 1 then return false, {} end
      return result, data
  end

There are two special handlers, which we use as prototypes
(https://www.lua.org/pil/16.1.html ) and derive some other handlers
from:

 - stringsHandler - handles a list of predefined multiline strings
   that may or may not begin at the beginning of a line and may or may
   not end at the end of a line. We identify where the last line of
   the message ends and remove that from the input line, leaving the
   rest for the next handler. In general, the pattern we look for in
   each line should match the whole line and should not end with
   something like ".*", unless we are absolutely sure that (1) each line
   always ends at the end of the line and (2) the line is short enough
   that it is never wrapped (which also implies that the first line
   always starts at the start of the line). This handler is quite
   complex because it has to deal with many different scenarios. We
   derive other handlers from the basic prototype so that we can assign
   different severity levels to each one. Derived handlers differ from
   the prototype only by severity level and the set of patterns to
   search for.

 - genericLatexHandler -> handles the multiline messages generated by
   the \PackageInfo, \ClassWarning etc. LaTeX commands. The handler
   does not need to know in advance the text for all these messages;
   it looks for generic patterns instead and extracts from the message
   itself the name of the package and severity level. It is able to
   identify multiline messages by checking if the following lines are
   prefixed with a specific pattern, so it can also handle messages
   with an unknown number of lines. We derive other handlers from the
   basic prototype because, for each kind of message, we use a different
   set of pattern captures, and we need to treat these differently.
   Derived handlers differ from the prototype by the set of patterns
   to search for and by the unpackData() method, which deals with the
   specific pattern captures.


----------------
UNWRAPPING LINES
----------------

As mentioned, each handler has a canDoit(position) method, where
"position" is the line number in the Lines input buffer (0 is the
current line, 1 is the next line etc.). As expected, doit() calls
canDoit() to check whether it should proceed of not. However, that
is not all: if canDoit() fails to find a match, it checks whether the
line might be a wrapped line. To detect a wrapped line, canDoit()
uses Lines:seemsWrapped() to do three things:

1. Check that the line is the "right" size (in Lines:wrappingLength())

2. Check that the next line is not the beginning of a known message
   (in Lines:noHandlersForNextLine())

3. Check whether unwrapping the line actually gives us something, i.e.,
   the unwrapped line matches something that its two separate parts
   did not.

The problem is in step (2): this entails calling canDoit() from all
handlers on the following line, which means that it may be called many
times and may even call itself on a different line. Therefore, it is
essential that canDoit() have no side effects, i.e., it should not set
any state besides the return values. For the same reason, canDoit()
cannot alter the content of the Lines buffer when it tries to unwrap a
line; it should only use temporary variables instead (not only that:
if unwrapping does not yield a match, we do not want to do it either).

canDoit() may, however, return a "hint" about the line wrapping - either
the text or the pattern that finally matched. Some handlers do that and,
in doit(), take the string or pattern found by canDoit() and unwrap
lines until finding a match to that string or pattern. Others cannot do
this due to various reasons and need to repeat the work already done
by canDoit() (the code is different, however).

Note also that some handlers, such as underOverFullBoxHandler and
genericLatexHandler, cannot do (3), as they do not know in advance
how the end of a given message should look like. The bottom line is,
line unwrapping is done in many different ways according to context.

Finally, this all means that we may call canDoit() from all handlers on
a given line many times. This gets really bad with stringsHandler: for
a sequence of k consecutive lines that are max_print_line long, this
handler alone is O(n^{k+1}), where n is the number of patterns that it
checks for (around 40). A previous implementation proved this to be
impractical, so we work around this problem with memoization.


-----------------------------------------
DETAILS ABOUT UNDER/OVERFULL BOX MESSAGES
-----------------------------------------

These are actually several different messages:

Overfull \[hv]box (Npt too wide) SOMEWHERE
Underfull \[hv]box (badness N) SOMEWHERE

Possible SOMEWHEREs:

1. detected at line N
   -> this is something like a makebox (horizontal) or parbox (vertical)
      with an explicit size argument. For hboxes, this is followed by
      the offending text.

2. has occurred while \output is active
   -> If horizontal, this is probably in a header, footer or something
      similar; if vertical, the vertical glues got too stretched. For
      hboxes, this is followed by the offending text.

3. in alignment at lines LINE NUMBERS
   -> the problematic box is part of a tabular or math alignment
      environment. The lines correspond to the whole align structure,
      not only the problematic box. This should only appear as an
      horizontal problem. This is followed by the offending text, but
      it is more often than not just a bunch of "[]" to indicate nested
      boxes

4. in paragraph at lines LINE NUMBERS
   -> "Normal" text. This also only appears as an horizontal problem.
      This is followed by the offending text, which may include a
      few "[]" for whatsits, glues etc. In particular, the text often
      begins with "[]", indicating the left margin glue.

In the log file, all under/overfull box messages are followed by a
description of the boxes involved. This is *not* normally included in
the console output (but it may be, depending on \tracingonline). The
level of detail of this description is controlled by \showboxdepth
and \showboxbreadth. The default for these in LaTeX is -1, which means
this description is omitted and replaced by "[]", so it looks like this:

Underfull \vbox (badness 10000) detected at line 128
 []   <-- this is the description

If the message includes the offending text, the description comes
after it:

Underfull \hbox (badness 3417) in paragraph at lines 128--128
 []\T1/LibertinusSerif-TLF/b/n/14.4 (+20) Some document text...
 []   <-- this is the description

If there is no offending text, the description may appear in the same
line as the under/overfull box message (both are \message's). The
offending text, if any, always starts at the beginning of a line and
ends at the end of a line (but may be wrapped).

About the description: https://tex.stackexchange.com/a/367589

This all means that handling these messages from a pipe is different
than from the log file, because in the log file you know there will
be a "[]" after the message. What we do here is check whether that
string is there; if it is, we remove it.

under/overfull messages that do not include the offending text are
\message's and, therefore, there may be extra text (such as a shipout)
on the same line.

--]]


--[[ ##################################################################### ]]--
--[[ ################ INIT, MAIN LOOP, CHOOSING HANDLER ################## ]]--
--[[ ##################################################################### ]]--

   DEBUG = 0
    INFO = 1
 WARNING = 2
CRITICAL = 3
 UNKNOWN = 4

        RED = '\x1B[31m'
     YELLOW = '\x1B[33m'
      GREEN = '\x1B[32m'
     BRIGHT = '\x1B[37;1m'
     BGREEN = '\x1B[32;1m'
RESET_COLOR = '\x1B[0m'

function main(arg)
  initializeKpse()
  processCommandLine(arg)
  initializeGlobals()
  registerHandlers()
  registerSummaries()
  convertFilterStringsToPatterns()
  detectEngine()

  while moreData() do
      if nextHandler == nil then
          chooseHandler()
      else
          handler = nextHandler
          nextHandler = nil
          handler:doit()
      end
  end

  -- dispatch remaining messages, if any
  if nextHandler then nextHandler:flush() end
  flushUnrecognizedMessages()
  dispatch(nil) -- end the output coroutine
end

function moreData()
  -- Refill the buffer. A simple experiment suggests 8 lines
  -- is enough, but why not use a higher value?
  while Lines:numLines() < 15 do
      tmp = logfile:read("*line")
      if tmp == nil then break end

      -- If we are running in a unix-like OS but the files we are
      -- processing were generated in Windows, lua may leave a \r
      -- character at the end of the line; if this happens, remove it
      local _, last = string.find(tmp, '\r$')
      if last ~= nil then tmp = string.sub(tmp, 1, last -1) end

      -- Do not skip blank lines here, we need to do it in Lines:append()
      Lines:append(tmp)
  end

  -- if there is remaining data from the previous iteration,
  -- we leave everything as-is for it to be processed now
  local tmp = Lines.current
  if tmp ~= nil and tmp ~= "" then return true end

  -- proceed to the next line
  flushUnrecognizedMessages()
  Lines:gotoNextLine()

  return Lines.current ~= nil
end

--[[
chooseHandler() never tries to process more than one message in a single
iteration for at least three reasons:

 * There may be no more data available on the current line, so we
   need to call moreData();

 * Maybe the next handler is one that we have already tried in this
   iteration; skipping it and trying others may fail;

 * Maybe the handler that last processed the data predefined the next
   handler, and we should not interfere with that.
--]]

function chooseHandler()
  -- Some messages can only appear at the beginning of a line
  if Lines.atBeginningOfLine then
      for _, candidateHandler in ipairs(beginningOfLineHandlers) do
          if candidateHandler:doit() then return end
      end
  end

  -- Others may appear anywhere
  for _, candidateHandler in ipairs(anywhereHandlers) do
      if candidateHandler:doit() then return end
  end

  -- No handler succeeded, which means this is an unrecognized message
  -- (or a fragment of one); Add to unrecognizedBuffer.
  handleUnrecognizedMessage()
end

function handleUnrecognizedMessage()
  -- Before sending this to the unrecognizedBuffer, check if
  -- there is another known message later on this same line.
  -- NOTE: check the comment before closeParensHandler:lookahead().
  local last = string.len(Lines.current)

  for _, handler in ipairs(anywhereHandlers) do
      local match, data = handler:lookahead()
      if match and data.first -1 < last then last = data.first -1 end
  end

  unrecognizedBuffer = unrecognizedBuffer .. string.sub(Lines.current, 1, last)
  Lines:handledChars(last)
end

function flushUnrecognizedMessages()
  unrecognizedBuffer = trim(unrecognizedBuffer)
  if unrecognizedBuffer == "" then return end

  local msg = Message:new()
  msg.content = unrecognizedBuffer
  dispatch(msg)
  unrecognizedBuffer = ""
end

-- Setup initial status (lots of globals, sue me)
function initializeGlobals()

  -- Chunks of text that were not recognized by any handler
  unrecognizedBuffer = ""

  -- The user may choose to silence some files. When one of these is
  -- opened/closed, this is set to true or false accordingly. The value
  -- is then used by Message:new()
  mute = false

  -- List of files that TeX had open at a given time during processing
  openFiles = Stack:new()

  -- "List" of currently active shipouts. There is only ever one shipout
  -- active at any time, but we borrow the design of openFiles because
  -- there may be "[" and "]" characters that do not correspond to any
  -- shipout, so we use this to keep track of them.
  shipouts = Stack:new()

  -- Counter, so we know the physical page number
  numShipouts = 0

  -- map physicalPage (from numShipouts) to latexPage (LaTeX counter)
  latexPages = {}

  -- After printing each message, the output coroutine stores them in
  -- currentPageMessages. When it receives a shipout message, it traverses
  -- currentPageMessages adding the page number it just learned about to
  -- each of the messages and clears currentPageMessages. This serves two
  -- purposes: it allows us to include the page numbers in the summaries
  -- and it allows us to include the page number in page-delay mode.
  currentPageMessages = {}

  -- The objects representing the summary for each kind of message are
  -- stored in summaries, so after all messages are processed we can just
  -- traverse this list calling :toString() and get all the summaries. The
  -- summaries are also used to suppress repeated messages. This table is
  -- populated by registerSummaries().
  summaries = {}

  -- All handlers should be in either of these. They are populated by
  -- registerHandlers().
  beginningOfLineHandlers = {}
  anywhereHandlers = {}

  -- Does the log file have wrapped lines?
  -- This may be changed by initializeKpse().
  badLogfile = true

  -- When we detect one of the many "please rerun LaTeX"
  -- messages, this is set to true (used in showSummary)
  SHOULD_RERUN_LATEX = false

  -- Have we reached the beginning of the epilogue yet?
  EPILOGUE = false

  -- If there were parse errors, we should say so in showSummary
  PARSE_ERROR = false

  -- Did we detect any error messages?
  ERRORS_DETECTED = false

  -- detectEngine() may set one of these to true
  LUATEX = false
  XETEX = false

  -- When we print a message that is the first from a given filename,
  -- we announce the filename first. This is used to detect the change
  -- in file - used by showFileBanner()
  lastFileBanner = ""

  -- Should we print "No important messages to show" at the end?
  nothingWasPrinted = true
end

function initializeKpse()
  -- In texlua, the texconfig table (the table that records some TeX
  -- config variables) is not initialized automatically; we need to
  -- call this to initialize it so we can read "max_print_line". If
  -- I understand things correctly, the name used here affects the
  -- loaded configuration options: using a name such as "texlogsieve"
  -- would allow us to add custom options to texmf.cnf. But since
  -- all we want to do is search for files and read the value of
  -- "max_print_line", let's just pretend we are luatex.
  kpse.set_program_name("luatex")

  max_print_line = tonumber(kpse.var_value("max_print_line"))
  if max_print_line ~= nil and max_print_line > 9999 then
      badLogfile = false
  else
      badLogfile = true
  end
end

function registerHandlers()
  table.insert(beginningOfLineHandlers, pseudoErrorHandler)
  table.insert(beginningOfLineHandlers, errorHandler)
  table.insert(beginningOfLineHandlers, citationHandler)
  table.insert(beginningOfLineHandlers, referenceHandler)
  table.insert(beginningOfLineHandlers, labelHandler)
  table.insert(beginningOfLineHandlers, unusedLabelHandler)
  table.insert(beginningOfLineHandlers, genericLatexHandler)
  table.insert(beginningOfLineHandlers, latex23MessageHandler)
  table.insert(beginningOfLineHandlers, genericLatexVariantIHandler)
  table.insert(beginningOfLineHandlers, genericLatexVariantIIHandler)
  table.insert(beginningOfLineHandlers, providesHandler)
  table.insert(beginningOfLineHandlers, geometryDetailsHandler)
  table.insert(beginningOfLineHandlers, epilogueHandler)
  table.insert(beginningOfLineHandlers, underOverFullBoxHandler)
  table.insert(beginningOfLineHandlers, utf8FontMapHandler)
  table.insert(beginningOfLineHandlers, missingCharHandler)
  table.insert(beginningOfLineHandlers, beginningOfLineDebugStringsHandler)
  table.insert(beginningOfLineHandlers, beginningOfLineInfoStringsHandler)
  table.insert(beginningOfLineHandlers, beginningOfLineWarningStringsHandler)
  table.insert(beginningOfLineHandlers, beginningOfLineCriticalStringsHandler)
  table.insert(anywhereHandlers, anywhereDebugStringsHandler)
  table.insert(anywhereHandlers, anywhereInfoStringsHandler)
  table.insert(anywhereHandlers, anywhereWarningStringsHandler)
  table.insert(anywhereHandlers, anywhereCriticalStringsHandler)
  table.insert(anywhereHandlers, fpHandler) -- before open/closeParensHandler!
  table.insert(anywhereHandlers, openParensHandler)
  table.insert(anywhereHandlers, closeParensHandler)
  table.insert(anywhereHandlers, openSquareBracketHandler)
  table.insert(anywhereHandlers, closeSquareBracketHandler)
  table.insert(anywhereHandlers, extraFilesHandler)

  for _, handler in ipairs(beginningOfLineHandlers) do
      handler:init()
  end

  for _, handler in ipairs(anywhereHandlers) do
      handler:init()
  end
end

function registerSummaries()
  table.insert(summaries, underOverSummary)
  table.insert(summaries, missingCharSummary)
  table.insert(summaries, repetitionsSummary)
  table.insert(summaries, unusedLabelsSummary)
  table.insert(summaries, citationsSummary)
  table.insert(summaries, referencesSummary)
  table.insert(summaries, labelsSummary)
end

function convertFilterStringsToPatterns()
  local tmp = {}
  for _, pattern in ipairs(SEMISILENCE_FILES) do
      table.insert(tmp, globtopattern(pattern))
  end
  SEMISILENCE_FILES = tmp

  tmp = {}
  for _, pattern in ipairs(SILENCE_FILES_RECURSIVE) do
        table.insert(tmp, globtopattern(pattern))
  end
  SILENCE_FILES_RECURSIVE = tmp

  tmp = {}
  for _, str in ipairs(SILENCE_STRINGS) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  SILENCE_STRINGS = tmp

  tmp = {}
  for _, str in ipairs(SILENCE_PKGS) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  SILENCE_PKGS = tmp

  tmp = {}
  for _, str in ipairs(FORCED_DEBUG) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  FORCED_DEBUG = tmp

  tmp = {}
  for _, str in ipairs(FORCED_INFO) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  FORCED_INFO = tmp

  tmp = {}
  for _, str in ipairs(FORCED_WARNING) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  FORCED_WARNING = tmp

  tmp = {}
  for _, str in ipairs(FORCED_CRITICAL) do
      local pat = stringToPattern(str)
      table.insert(tmp, pat)
  end
  FORCED_CRITICAL = tmp
end

function detectEngine()
  local line = logfile:read("*line")
  if line == nil then return end

  if string.find(string.lower(line), '^this is lua') then
      LUATEX = true
  elseif string.find(string.lower(line), '^this is xe') then
      XETEX = true
  end

  -- leave the line for normal processing
  Lines:append(line)
end

helpmsg = [[
Usage: texlogsieve [OPTION]... [INPUT FILE]
texlogsieve reads a LaTeX log file (or the standard input), filters
out less relevant messages, and displays a summary report.

texlogsieve reads additional options from the texlogsieverc file
if it exists anywhere in the TeX path (for example, in the current
directory).

Options:
  --page-delay, --no-page-delay          enable/disable grouping
                                         messages by page before display
  --summary, --no-summary                enable/disable final summary
  --only-summary                         no filtering, only final summary
  --shipouts, --no-shipouts              enable/disable reporting shipouts
  --file-banner, --no-file-banner        Show/suppress "From file ..." banners
  --repetitions, --no-repetitions        allow/prevent repeated messages
  --be-redundant, --no-be-redundant      present/suppress ordinary messages
                                         that will also appear in the summary
  --box-detail, --no-box-detail          include/exclude full under/overfull
                                         boxes information in the summary
  --ref-detail, --no-ref-detail          include/exclude full undefined refs
                                         information in the summary
  --cite-detail, --no-cite-detail        include/exclude full undefined
                                         citations information in the summary
  --summary-detail, --no-summary-detail  toggle box-detail, ref-detail, and
                                         cite-detail at once
  --heartbeat, --no-heartbeat            enable/disable progress gauge
  --color, --no-color                    enable/disable colored output
  --tips, --no-tips                      enable/disable suggesting fixes
  -l LEVEL, --minlevel=LEVEL             filter out messages with severity
                                         level lower than [LEVEL]. Valid
                                         levels are DEBUG, INFO, WARNING,
                                         CRITICAL, and UNKNOWN
  -u, --unwrap-only                      no filtering or summary, only
                                         unwrap long, wrapped lines
  --silence-package=PKGNAME              suppress messages from package
                                         PKGNAME; can be used multiple times
  --silence-string=EXCERPT               suppress messages containing text
                                         EXCERPT; can be used multiple times
  --silence-file=FILENAME                suppress messages generated during
                                         processing of FILENAME; can be used
                                         multiple times
  --semisilence-file=FILENAME            similar to --silence-file, but not
                                         recursive
  --add-debug-message=MESSAGE            add new recognizable DEBUG message
  --add-info-message=MESSAGE             add new recognizable INFO message
  --add-warning-message=MESSAGE          add new recognizable WARNING message
  --add-critical-message=MESSAGE         add new recognizable CRITICAL message
  --set-to-level-debug=EXCERPT           reset severity of messages containing
                                         text EXCERPT to DEBUG; can be used
                                         multiple times
  --set-to-level-info=EXCERPT            reset severity of messages containing
                                         text EXCERPT to INFO; can be used
                                         multiple times
  --set-to-level-warning=EXCERPT         reset severity of messages containing
                                         text EXCERPT to WARNING; can be used
                                         multiple times
  --set-to-level-critical=EXCERPT        reset severity of messages containing
                                         text EXCERPT to CRITICAL; can be used
                                         multiple times
  -c cfgfile, --config-file=cfgfile      read options from given config file
                                         in addition to default config files
  -v, --verbose                          Display info on texlogsieve config
  -h, --help                             give this help list
  --version                              print program version]]

versionmsg = [[
texlogsieve 1.6.0
Copyright (C) 2021-2025 Nelson Lago <lago@ime.usp.br>
License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.]]

function processCommandLine(args)
  HEARTBEAT = true
  PAGE_DELAY = true
  ONLY_SUMMARY = false
  SHOW_SUMMARY = true
  SHOW_SHIPOUTS = false
  RAW = false
  SILENCE_REPETITIONS = true
  MINLEVEL = WARNING
  BE_REDUNDANT = false
  FILE_BANNER = true
  DETAILED_UNDEROVER_SUMMARY = true
  DETAILED_REFERENCE_SUMMARY = true
  DETAILED_CITATION_SUMMARY = true
  TIPS = true

  COLOR = false

  SILENCE_STRINGS = {}
  SILENCE_PKGS = {} -- just the package names
  SEMISILENCE_FILES = {} -- filenames (without leading path), file globs work
  SILENCE_FILES_RECURSIVE = {} -- same

  -- The user may redefine the severity level of some messages.
  FORCED_DEBUG = {}
  FORCED_INFO = {}
  FORCED_WARNING = {}
  FORCED_CRITICAL = {}

  -- "-l level -c configFile"
  local optionsWithArgs = "lc"
  local vars = simpleGetopt(args, optionsWithArgs)

  --help
  -- "-h"
  if vars.help or vars.h then
      for _, line in ipairs(linesToTable(helpmsg)) do print(line) end
      os.exit(0)
  end

  --version
  if vars.version then
      for _, line in ipairs(linesToTable(versionmsg)) do print(line) end
      os.exit(0)
  end

  --config-file=file
  -- "-c file"
  local configFileNames = vars['config-file']
  if configFileNames == nil then configFileNames = {} end

  if vars.c ~= nil then
      for _, val in ipairs(vars.c) do
          table.insert(configFileNames, val)
      end
  end

  -- Add the default config file to the beginning of the list
  local filename = kpse.find_file('texlogsieverc')
  if filename ~= nil then table.insert(configFileNames, 1, filename) end
  filename = nil

  if os.type == "unix" then
      local dirname = os.getenv("HOME") -- should never be nil, but...
      if dirname then
          filename = kpse.find_file(dirname .. "/.texlogsieverc")
      end
  else
      -- https://docs.microsoft.com/en-us/windows/deployment/usmt/usmt-recognized-environment-variables
      -- %LOCALAPPDATA% corresponds to C:\Users\<username>\AppData\Local .
      -- %APPDATA% is "C:\Users\<username>\AppData\Roaming" or
      -- "C:\Documents and Settings\<username>\Application Data"
      for _, envdir in ipairs({"LOCALAPPDATA", "APPDATA"}) do
          local dirname = os.getenv(envdir)
          if dirname then
              filename = kpse.find_file(dirname .. "/texlogsieverc")
              if filename then break end
          end
      end
  end

  if filename ~= nil then table.insert(configFileNames, 1, filename) end
  filename = nil

  for _, filename in ipairs(configFileNames) do
      local configFile = assert(io.open(filename, "r"))
      vars = processConfigFile(configFile, vars)
  end

  vars.c = nil
  vars['config-file'] = nil


  --unwrap-only
  -- "-u"
  if vars['unwrap-only'] or vars.u then
      -- these may be overriden below, so order matters
      RAW = true
      SHOW_SUMMARY = false
      PAGE_DELAY = false
      SHOW_SHIPOUTS = true
      SILENCE_REPETITIONS = false
      BE_REDUNDANT = true
      MINLEVEL = DEBUG
      FILE_BANNER = false
  end

  vars.u = nil
  vars['unwrap-only'] = nil


  --page-delay
  --no-page-delay
  --page-delay=true/false
  if vars['no-page-delay']
          or vars['page-delay'] ~= nil and not vars['page-delay'] then

      PAGE_DELAY = false
      SHOW_SHIPOUTS = true -- this may be overriden below
  end
  if vars['page-delay'] then PAGE_DELAY = true end

  vars['page-delay'] = nil
  vars['no-page-delay'] = nil


  --only-summary
  if vars['only-summary'] then ONLY_SUMMARY = true end

  --no-summary
  --summary
  --summary=true/false
  if vars['no-summary'] or vars.summary ~= nil and not vars.summary then
      SHOW_SUMMARY = false
      SILENCE_REPETITIONS = false
      BE_REDUNDANT = true
  end
  if vars.summary then SHOW_SUMMARY = true end

  vars['only-summary'] = nil
  vars.summary = nil
  vars['no-summary'] = nil


  --no-shipouts
  --shipouts
  --shipouts=true/false
  if vars['no-shipouts'] or vars.shipouts ~= nil and not vars.shipouts then
      SHOW_SHIPOUTS = false
  end
  if vars.shipouts then SHOW_SHIPOUTS = true end

  vars.shipouts = nil
  vars['no-shipouts'] = nil


  --minlevel
  -- "-l"
  local level
  if vars.minlevel ~= nil then level = vars.minlevel end
  if vars.l ~= nil then level = vars.l end

  if level ~= nil then
      level = string.lower(level[1])
      if     level == "debug"    then MINLEVEL = DEBUG
      elseif level == "info"     then MINLEVEL = INFO
      elseif level == "warning"  then MINLEVEL = WARNING
      elseif level == "critical" then MINLEVEL = CRITICAL
      elseif level == "unknown"  then MINLEVEL = UNKNOWN
      else
          print('    texlogsieve: unknown level "' .. level .. '"')
          print('                 for help, try "texlogsieve --help"')
          print()
          os.exit(1)
      end
  end

  -- When severity is debug, we already output open/close
  -- file messages; adding these would be confusing.
  if MINLEVEL == DEBUG then FILE_BANNER = false end

  vars.l = nil
  vars.minlevel = nil


  --no-file-banner
  --file-banner
  --file-banner=true/false
  if vars['no-file-banner'] or vars['file-banner'] ~= nil
                        and not vars['file-banner'] then

      FILE_BANNER = false
  end
  if vars['file-banner'] then FILE_BANNER = true end

  vars['file-banner'] = nil
  vars['no-file-banner'] = nil


  --no-repetitions
  --repetitions
  --repetitions=true/false
  if vars['no-repetitions']
                    or vars.repetitions ~= nil and not vars.repetitions then

      SILENCE_REPETITIONS = true
  end
  if vars.repetitions then SILENCE_REPETITIONS = false end

  vars.repetitions = nil
  vars['no-repetitions'] = nil


  --be-redundant
  --no-be-redundant
  --be-redundant=true/false
  if vars['no-be-redundant']
                    or vars['be-redundant'] ~= nil
                    and not vars['be-redundant'] then

      BE_REDUNDANT = false
  end
  if vars['be-redundant'] then BE_REDUNDANT = true end

  vars['be-redundant'] = nil
  vars['no-be-redundant'] = nil


  --summary-detail
  --no-summary-detail
  --summary-detail=true/false
  if vars['no-summary-detail']
                    or vars['summary-detail'] ~= nil
                    and not vars['summary-detail'] then

      DETAILED_UNDEROVER_SUMMARY = false
      DETAILED_REFERENCE_SUMMARY = false
      DETAILED_CITATION_SUMMARY = false
  end
  if vars['summary-detail'] then
      DETAILED_UNDEROVER_SUMMARY = true
      DETAILED_REFERENCE_SUMMARY = true
      DETAILED_CITATION_SUMMARY = true
  end

  vars['summary-detail'] = nil
  vars['no-summary-detail'] = nil


  --box-detail
  --no-box-detail
  --box-detail=true/false
  if vars['no-box-detail']
                    or vars['box-detail'] ~= nil
                    and not vars['box-detail'] then

      DETAILED_UNDEROVER_SUMMARY = false
  end
  if vars['box-detail'] then DETAILED_UNDEROVER_SUMMARY = true end

  vars['box-detail'] = nil
  vars['no-box-detail'] = nil


  --ref-detail
  --no-ref-detail
  --ref-detail=true/false
  if vars['no-ref-detail']
                    or vars['ref-detail'] ~= nil
                    and not vars['ref-detail'] then

      DETAILED_REFERENCE_SUMMARY = false
  end
  if vars['ref-detail'] then DETAILED_REFERENCE_SUMMARY = true end

  vars['ref-detail'] = nil
  vars['no-ref-detail'] = nil


  --cite-detail
  --no-cite-detail
  --cite-detail=true/false
  if vars['no-cite-detail']
                    or vars['cite-detail'] ~= nil
                    and not vars['cite-detail'] then

      DETAILED_CITATION_SUMMARY = false
  end
  if vars['cite-detail'] then DETAILED_CITATION_SUMMARY = true end

  vars['cite-detail'] = nil
  vars['no-cite-detail'] = nil


  --no-heartbeat
  --heartbeat
  --heartbeat=true/false
  if vars['no-heartbeat'] or vars.heartbeat ~= nil and not vars.heartbeat then
      HEARTBEAT = false
  end
  if vars.heartbeat then HEARTBEAT = true end

  vars.heartbeat = nil
  vars['no-heartbeat'] = nil


  --no-color
  --color
  --color=true/false
  if vars['no-color'] or vars.color ~= nil and not vars.color then
      COLOR = false
  end
  if vars.color then COLOR = true end

  vars.color = nil
  vars['no-color'] = nil


  --no-tips
  --tips
  --tips=true/false
  if vars['no-tips'] or vars.tips ~= nil and not vars.tips then
      TIPS = false
  end
  if vars.tips then TIPS = true end

  vars.tips = nil
  vars['no-tips'] = nil


  if vars.filename == nil then
      logfile = io.stdin
  else
      local filename
      local exts = {"", ".log", "log"}
      for _, ext in ipairs(exts) do
          filename = vars.filename .. ext
          logfile = io.open(filename, "r")
          if logfile ~= nil then break end
      end
      assert(logfile ~= nil)
      readFls(filename)
  end

  vars.filename = nil


  if vars['silence-string'] then SILENCE_STRINGS = vars['silence-string'] end

  if vars['silence-package'] then SILENCE_PKGS = vars['silence-package'] end

  if vars['silence-file'] then SILENCE_FILES_RECURSIVE =
                                    vars['silence-file'] end

  if vars['semisilence-file'] then SEMISILENCE_FILES =
                                    vars['semisilence-file'] end

  vars['silence-string'] = nil
  vars['silence-package'] = nil
  vars['silence-file'] = nil
  vars['semisilence-file'] = nil


  if vars['add-debug-message'] then
      for _, msg in ipairs(vars['add-debug-message']) do
          local pat = stringToPattern(msg)
          if not string.find(pat, '^', 1, true) then pat = '^%s*' .. pat end
          pat = string.gsub(pat, '\\n', '%%s*\n')
          table.insert(anywhereDebugStringsHandler.patterns, pat)
      end
  end

  if vars['add-info-message'] then
      for _, msg in ipairs(vars['add-info-message']) do
          local pat = stringToPattern(msg)
          if not string.find(pat, '^', 1, true) then pat = '^%s*' .. pat end
          pat = string.gsub(pat, '\\n', '%%s*\n')
          table.insert(anywhereInfoStringsHandler.patterns, pat)
      end
  end

  if vars['add-warning-message'] then
      for _, msg in ipairs(vars['add-warning-message']) do
          local pat = stringToPattern(msg)
          if not string.find(pat, '^', 1, true) then pat = '^%s*' .. pat end
          pat = string.gsub(pat, '\\n', '%%s*\n')
          table.insert(anywhereWarningStringsHandler.patterns, pat)
      end
  end

  if vars['add-critical-message'] then
      for _, msg in ipairs(vars['add-critical-message']) do
          local pat = stringToPattern(msg)
          if not string.find(pat, '^', 1, true) then pat = '^%s*' .. pat end
          pat = string.gsub(pat, '\\n', '%%s*\n')
          table.insert(anywhereCriticalStringsHandler.patterns, pat)
      end
  end

  vars['add-debug-message'] = nil
  vars['add-info-message'] = nil
  vars['add-warning-message'] = nil
  vars['add-critical-message'] = nil


  if vars['set-to-level-debug'] then
      FORCED_DEBUG = vars['set-to-level-debug']
  end

  if vars['set-to-level-info'] then
      FORCED_INFO = vars['set-to-level-info']
  end

  if vars['set-to-level-warning'] then
      FORCED_WARNING = vars['set-to-level-warning']
  end

  if vars['set-to-level-critical'] then
      FORCED_CRITICAL = vars['set-to-level-critical']
  end

  vars['set-to-level-debug'] = nil
  vars['set-to-level-info'] = nil
  vars['set-to-level-warning'] = nil
  vars['set-to-level-critical'] = nil


  --verbose
  -- "-v"
  if vars['verbose'] or vars.v then
      local msg = ""

      for _, name in ipairs(configFileNames) do msg = msg .. name .. ", " end

      if msg == "" then
          print("texlogsieve: no config files, using defaults")
      else
          msg = string.sub(msg, 1, -3)
          print("texlogsieve: using config files: " .. msg)
      end

      msg = "texlogsieve: minlevel is "
      if     MINLEVEL == 0 then msg = msg .. 'DEBUG'
      elseif MINLEVEL == 1 then msg = msg .. 'INFO'
      elseif MINLEVEL == 2 then msg = msg .. 'WARNING'
      elseif MINLEVEL == 3 then msg = msg .. 'CRITICAL'
      else                      msg = msg .. 'UNKNOWN'
      end

      print(msg)

      if RAW then
          print("texlogsieve: using raw (unwrap-only) mode")
      end
  end

  vars['verbose'] = nil
  vars.v = nil


  local unknown_options = false
  for k, v in pairs(vars) do
      print('    texlogsieve: unknown option "' .. k .. '"')
      unknown_options = true
  end

  if unknown_options then
      print('                 for help, try "texlogsieve --help"')
      print()
      os.exit(1)
  end
end

function processConfigFile(configFile, currentVars)
  local fileVars = {}

  while true do
      local line = configFile:read("*line")
      if line == nil then break end

      line = trim(line)
      if not string.find(line, '^#') and line ~= "" then
          local equals = string.find(line, '=', 1, true)
          if equals ~= nil then
              optname = string.sub(line, 1, equals -1)
              optval = string.sub(line, equals +1)
              optname = trim(optname)
              optval = trim(optval)
          else
              optname = line
              optval = true
          end
          simpleGetoptStoreVal(fileVars, optname, optval)
      end
  end

    -- merge fileVars with currentVars; currentVars has precedence
    for k, v in pairs(currentVars) do
        if type(v) == "boolean" then
            fileVars[k] = v
        elseif fileVars[k] == nil then
            fileVars[k] = v
        else
            -- the value is a table, so append
            for _, item in ipairs(v) do
                table.insert(fileVars[k], item)
            end
        end
    end

  -- return the merged values
  return fileVars
end


--[[ ##################################################################### ]]--
--[[ ################# OUTPUT COROUTINE AND FORMATTING ################### ]]--
--[[ ##################################################################### ]]--

outputCoroutine = coroutine.create(
  function(msg)
      while msg ~= nil do
          processMessage(msg)
          msg = coroutine.yield()
      end
      finishProcessingMessages()
  end
)

dispatch = function(msg) coroutine.resume(outputCoroutine, msg) end

function adjustSeverity(msg)
  formatted = trim(msg:realToString())
  if formatted == "" then return end

  DEFAULT_FORCED_DEBUG = {
    -- This is a harmless message caused by a bug in the
    -- biblatex-abnt package that can safely be ignored
    "File 'brazilian%-abnt%-abnt%.lbx' not found!",
  }

  DEFAULT_FORCED_INFO = {
    "File %b`' already exists on the system%."
            .. "%s*Not generating it from",
    "You have requested package %b`',"
            .. "%s*but the package provides",
    "Writing file %b`'",
    "Form Feed has been converted to Blank",
    "Tab has been converted to Blank",
    "The morewrites package is unnecessary",
    'Unused \\captionsetup%b[]',
    "Unknown feature `' in font %b`'", -- empty feature, not a problem
    "Package refcheck Warning: Unused label %b`'", -- we process these specially
    "Token not allowed in a PDF string %(Unicode%):",
    "Font shape `[^']*/m/[^']*' in size %b<> not available.*"
            .. "Font shape `[^']*/regular/[^']*' tried instead",
    "Font shape `[^']*/b/[^']*' in size %b<> not available.*"
            .. "Font shape `[^']*/bold/[^']*' tried instead",
    -- versions of xcolor after 2022
    "Package option `hyperref' is obsolete and ignored",
    "Package lipsum Warning: Unknown language 'latin'%.",
  }

  DEFAULT_FORCED_WARNING = {}

  DEFAULT_FORCED_CRITICAL = {
    "Label %b`' multiply defined",
    "Command .- invalid in math mode",
    "Optional argument of \\twocolumn too tall on page",
    "Marginpar on page %S- moved",
    "Some font shapes were not available, defaults substituted%.",
    "Font shape %b`' in size %b<> not available"
            .. "%s+Font shape %b`' tried instead",
    "Font shape %b`' in size %S+ not available"
            .. "%s+external font %b`' used",
    "Font shape %b`' undefined"
            .. "%s+using %b`' instead",
  }


  -- We do things this way so that user-defined strings override these
  -- defaults (note that there is no "return" in the DEFAULT_FORCED... blocks)
  if msg.severity ~= DEBUG then
      for _, val in ipairs(DEFAULT_FORCED_DEBUG) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = DEBUG
          end
      end
  end

  if msg.severity ~= INFO then
      for _, val in ipairs(DEFAULT_FORCED_INFO) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = INFO
          end
      end
  end

  if msg.severity ~= WARNING then
      for _, val in ipairs(DEFAULT_FORCED_WARNING) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = WARNING
          end
      end
  end

  if msg.severity ~= CRITICAL then
      for _, val in ipairs(DEFAULT_FORCED_CRITICAL) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = CRITICAL
          end
      end
  end

  if msg.severity ~= DEBUG then
      for _, val in ipairs(FORCED_DEBUG) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = DEBUG
              return
          end
      end
  end

  if msg.severity ~= INFO then
      for _, val in ipairs(FORCED_INFO) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = INFO
              return
          end
      end
  end

  if msg.severity ~= WARNING then
      for _, val in ipairs(FORCED_WARNING) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = WARNING
              return
          end
      end
  end

  if msg.severity ~= CRITICAL then
      for _, val in ipairs(FORCED_CRITICAL) do
          if string.find(formatted, val) or string.find(msg.content, val) then
              msg.severity = CRITICAL
              return
          end
      end
  end
end


function addTip(msg)
  formatted = trim(msg:realToString())
  if formatted == "" then return end

  tips = {
      ["multiple pdfs with page group included"] =
                   '** The message above is usually harmless\n'
                .. '** (check https://tex.stackexchange.com/questions/76273 );\n'
                .. '** processing your included PDF files (images etc.) with\n'
                .. '** "gs -dNOPAUSE -dBATCH -dSAFER -sDEVICE=pdfwrite '
                .. '-dCompatibilityLevel=1.5 -sOutputFile=out.pdf in.pdf"\n'
                .. '** usually eliminates this warning with no ill effects.\n'
                .. '** If that does not work, try 1.3 instead of 1.5 (note\n'
                .. '** that 1.3 may affect output quality and file size).',
      ["found PDF version %b<>, but at most version %b<> allowed"] =
                   "** The message above is usually harmless;\n"
                .. "** you may want to add \\pdfminorversion=7 (pdflatex) or\n"
                .. "** \\pdfvariable minorversion 7 (lualatex) before\n"
                .. "** \\documentclass in your document.",
      ["I've expected \\MT_cramped_internal:Nn to have\nthe meaning"] =
                   "** The message above indicates a bug that was fixed\n"
                .. "** in version 1.26 of package mathtools together with\n"
                .. "** version 1.10 of package lualatex-math (both from\n"
                .. "** March/2021).",
      ["Unable to apply patch `footnote' on input line"] =
                   "** The message above indicates a bug in microtype\n"
                .. "** fixed in version 3.0e (from June/2022)."
  }

  for msgtext, tip in pairs(tips) do
      if string.find(formatted, msgtext) or string.find(msg.content, msgtext) then
          msg.content = msg.content .. "\n** texlogsieve tip:\n" .. tip
      end
  end
end


function processMessage(msg)
  SHOULD_RERUN_LATEX = SHOULD_RERUN_LATEX or msg:checkMatch(msg.rerunMessages)

  adjustSeverity(msg)

  if TIPS then addTip(msg) end

  if ONLY_SUMMARY or PAGE_DELAY then
      heartbeat:tick()
  else
      showMessage(msg)
  end

  -- aggregate until shipout
  table.insert(currentPageMessages, msg)

  if msg.shipout then
      heartbeat:stop()

      for _, tmp in ipairs(currentPageMessages) do
          tmp.physicalPage = msg.physicalPage
          -- normally, toSummary() is called by showMessage(),
          -- but with ONLY_SUMMARY that is never called
          if ONLY_SUMMARY then tmp:toSummary() end
      end

      if PAGE_DELAY and not ONLY_SUMMARY then
          showPageMessages()
      end

      currentPageMessages = {}
  end
end

function finishProcessingMessages()
  heartbeat:stop()

  -- messages after the last shipout
  if PAGE_DELAY and not ONLY_SUMMARY then showRemainingMessages() end

  if SHOW_SUMMARY then showSummary() end

  if nothingWasPrinted then print("No important messages to show") end
end

function showFileBanner(msg)
  if msg.filename == "DUMMY" then PARSE_ERROR = true end

  if not FILE_BANNER then return end

  if msg.filename ~= nil
                and msg.filename ~= ""
                and msg.filename ~= lastFileBanner
  then
      lastFileBanner = msg.filename
      local txt = "From file " .. msg.filename .. ":"
      if COLOR then
          txt = yellow(txt)
      else
          print(string.rep('-', string.len(txt)))
      end
      print(txt)
  end
end

function showMessage(msg, bypassMostFilters)
  msg.suppressed = true
  local formatted = msg:toString(bypassMostFilters)

  if trim(formatted) ~= "" then
      msg.suppressed = false
      local pageinfo = ""
      local spaces = ""
      if not RAW and msg.physicalPage ~= nil then
          pageinfo = 'pg ' .. msg.physicalPage .. ': '
          spaces = string.rep(" ", string.len(pageinfo)) -- before color
          if COLOR then pageinfo = bright(pageinfo) end
      end

      -- A message is a repetition if it has
      -- already been included in some summary
      local alreadySeen = false
      if SILENCE_REPETITIONS then
          for _, summary in ipairs(summaries) do
              if summary:alreadySeen(msg) then
                  alreadySeen = true
                  msg.suppressed = true
                  break
              end
          end
      end

      if not SILENCE_REPETITIONS or not alreadySeen then

          -- For unknown messages, we show the previous one
          -- if it was suppressed to give some context, unless
          -- that message is certainly unrelated
          if msg.severity == UNKNOWN
                  and lastMessage ~= nil
                  and msg ~= lastMessage
                  and lastMessage.suppressed
                  and not lastMessage.alwaysEnds
          then
              local TMP = SILENCE_REPETITIONS
              SILENCE_REPETITIONS = false
              showMessage(lastMessage, true)
              SILENCE_REPETITIONS = TMP
          end

          showFileBanner(msg)

          for _, line in ipairs(linesToTable(formatted)) do
              if COLOR and msg.severity >= CRITICAL then
                  line = red(line)
              end

              print(pageinfo .. line)

              pageinfo = spaces
          end

          nothingWasPrinted = false
      end
  end

  if msg ~= lastMessage then
      lastMessage = msg
      msg:toSummary()
  end
end

function showPageMessages()
  for _, msg in ipairs(currentPageMessages) do
      showMessage(msg)
  end
end

function showRemainingMessages()
  local somethingAfterLastPage = false
  for _, msg in ipairs(currentPageMessages) do
      if trim(msg:toString()) ~= "" then
          somethingAfterLastPage = true
          break
      end
  end

  if somethingAfterLastPage then
      print()
      local txt = "After last page:"
      if COLOR then txt = bgreen(txt) end
      print(txt)
      print()
  end

  -- we always call this, even if there is nothing to show,
  -- because it calls :toSummary() for each message
  showPageMessages()
end

function showSummary()
  local somethingInSummary = false

  if SHOULD_RERUN_LATEX or ERRORS_DETECTED or PARSE_ERROR then
      somethingInSummary = true
  else
      for _, summary in ipairs(summaries) do
          if trim(summary:toString()) ~= "" then
              somethingInSummary = true
              break
          end
      end
  end

  if not somethingInSummary then return end

  nothingWasPrinted = false

  if not ONLY_SUMMARY then
      print("")
      local txt = "====  Summary:  ===="
      if COLOR then txt = bgreen(txt) end
      print(txt)
      print("")
  end

  for _, summary in ipairs(summaries) do
      local formatted = summary:toString()
      local prefix = ""
      if trim(formatted) ~= "" then
          for _, line in ipairs(linesToTable(formatted)) do
              print(prefix .. line)
              prefix = '    '
          end
          print("")
      end
  end

  if SHOULD_RERUN_LATEX then
      local txt = "** LaTeX says you should rerun **"
      if COLOR then txt = red(txt) end
      print(txt)
      print()
  end

  if PARSE_ERROR then
      local txt = "** texlogsieve got confused during log processing **\n"
               .. "   (did you run it from the same directory as latex?)\n"
               .. "   messages may be missing or filenames may be\n"
               .. "   incorrect, check the LaTeX logfile directly"
      if COLOR then txt = red(txt) end
      print(txt)
      print()
  end

  if ERRORS_DETECTED then
      local txt = "** There were errors during processing! Generated PDF is probably defective **"
      if COLOR then txt = red(txt) end
      print(txt)
      print()
  end
end

heartbeat = {}
heartbeat.chars = {'/', '-', '\\', '|'}
heartbeat.idx = 0
heartbeat.cnt = 0
heartbeat.startline = true

function heartbeat:tick()
  if not HEARTBEAT then return end

  if self.cnt % 5 == 0 then
      if self.startline then
          self.startline = false
      else
          io.stderr:write('\b')
      end
      local i = self.idx %4 +1
      io.stderr:write(self.chars[i])
      self.idx = self.idx +1
  end
  self.cnt = self.cnt +1
end

function heartbeat:stop()
  if not HEARTBEAT or self.startline then return end

  io.stderr:write('\b \b')
  self.startline = true
end


--[[ ##################################################################### ]]--
--[[ ########################### THE HANDLERS ############################ ]]--
--[[ ##################################################################### ]]--

-- datepat and filepat will come in handy later on.
--
-- Absolute paths may be in the form "/...blah.ext" and "C:\...blah.ext";
-- Relative paths may be in the form "./blah.ext" or "blah.ext". This last
-- form makes ordinary words almost indistinguishable from paths. Since it
-- is also possible for a path to include dots and spaces, this pattern has
-- to be *very* permissible, which means it may easily match something that
-- is not really a filename. Don't blindly trust it! guessFilename() uses
-- this but with added sanity checks.
--
-- filepat will also fail:
--
-- 1. If the path or filename includes weird characters, such as ":" or "|"
-- 2. If the file has no extension or the extension has only one character
-- 3. If the extension includes "-", "_", or spaces
-- 4. If filepat should match the end of the message and the matching line is
--    wrapped in the middle of the file extension, for example "myfile.pd\nf"
--    (but we have a hack in unwrapUntilPatternMatches() to work around that)

datepat = '%d%d%d%d[/%-%.]%d%d[/%-%.]%d%d'

-- These charaters should never be part of a path
unreasonable = '%%:;,%=%*%?%|%&%$%#%!%@"%`\'%<%>%[%]%{%}%(%)'

filepat =    '[^%s%-' .. unreasonable .. ']' -- the first char
          .. ':?' -- If the first char is a drive letter
          .. '[^' .. unreasonable .. ']*' -- other (optional) chars
          .. '%.' -- the extension must exist and be at least two chars long
          .. '[^/\\ %-%_%.' .. unreasonable .. ']'
          .. '[^/\\ %-%_%.' .. unreasonable .. ']+'

-- This is even more fragile than filepat, it matches almost anything.
dirpat =    '[^%s%-' .. unreasonable .. ']'
         .. '[^' .. unreasonable .. ']*'

-------------------------------------------------------------------------------
-- HandlerPrototype
-------------------------------------------------------------------------------

HandlerPrototype = {}

function HandlerPrototype:new()
    local o = {}
    setmetatable(o, self)
    self.__index = self
    return o
end

-- Some handlers need an init() method; by inheriting from this,
-- we can simply iterate over all of them calling init().
function HandlerPrototype:init()
end

-- Only some of the handlers use this implementation
function HandlerPrototype:unwrapLines()
  while Lines:seemsWrapped() do Lines:unwrapOneLine() end
end

function HandlerPrototype:newMessage()
  return Message:new()
end

-- We use this after the main loop ends (we reached the end of the
-- input file) to output any messages that have not been fully
-- processed by whatever is defined as nextHandler. This should
-- never happen, but it may if the file is truncated (which may
-- happen if there were errors).
function HandlerPrototype:flush()
  if self.message ~= nil then
      dispatch(self.message)
      self.message = nil
  end
end

function HandlerPrototype:tryUnwrapping(position)
  -- For some handlers, it makes sense to unwrap a few lines ahead to
  -- see whether this unwrapping allows us to match. However, we need to
  -- be careful: if we match on subsequent lines by themselves, then the
  -- match is not the result of unwrapping, so we should return false.

  if not badLogfile then return false, {} end

  line = Lines:get(position)
  local offset = 1
  local subsequentLines = ""
  local all = ""
  local match = nil
  local first, last

  -- Ideally, we would do "while Lines:seemsWrapped(position + offset -1)".
  -- However, in the epilogue there may be lines wrapped at the "wrong"
  -- place, so we cannot do that. Doing it like this instead should not
  -- have any ill consequences, as long as the patterns we are looking
  -- for do not end in something like ".*".

  while offset < 5 do -- unwrapping the 4 subsequent lines is enough
      local nextLine = Lines:get(position + offset)
      if not nextLine then return false, {} end

      subsequentLines = subsequentLines .. nextLine
      local nextMatch = nil
      for _, pat in ipairs(self.patterns) do
          first, last = string.find(subsequentLines, pat)
          if last then
              nextMatch = string.sub(subsequentLines, first, last)
              break
          end
      end

      all = line .. subsequentLines
      match = nil
      for _, pat in ipairs(self.patterns) do
          first, last = string.find(all, pat)
          if last then
              match = string.sub(all, first, last)
              break
          end
      end

      if last then
          if match == nextMatch then return false, {} else break end
      end

      offset = offset +1
  end

  if not first then return false, {} end
  return true, {first = first , match = match}
end

-------------------------------------------------------------------------------
-- errorHandler
-- pseudoErrorHandler
--
-- errorHandler simply identifies errors and defines "ERRORS_DETECTED" as true.
-- We might get away with just detecting lines that start with "! ", but that
-- might fail with a wrapped line, so we go the extra mile to make sure this
-- is really an error. pseudoErrorHandler deals with some low-level warnings
-- generated by some engines that have a similar format to error messages.
-------------------------------------------------------------------------------

errorHandler = HandlerPrototype:new()

errorHandler.patterns = {
    -- luatex engine errors
    "error:%s+%(pdf backend%): 'endlink' ended up in different "
                .. "nesting level than 'startlink'",
    "error:%s+%(pdf backend%): \\pdfextension endlink cannot be "
                .. "used in vertical mode",
    -- pdftex engine errors
    "pdfTeX error %(ext1%): \\pdfendlink cannot be used in vertical mode%.",
    "pdfTeX error %(ext5%): cannot open file for embedding%.",
    -- basic LaTeX error messages follow these patterns
    'Package .- Error: ',
    'Class .- Error: ',
    'LaTeX Error: ',
    -- This list comes from The LaTeX Companion 2nd ed.
    'Undefined control sequence',
    'Missing .- inserted',
    'Counter too large',
    'Double %S+script',
    'Extra',
    'Font .- not loaded',
    'Illegal ',
    'Misplaced ',
    'Missing number, treated as zero',
    'Not a letter',
    'Paragraph ended before .- was completed',
    'TeX capacity exceeded',
    'Text line contains an invalid character',
    'Use of .- match its definition',
    "You can't use .- in .- mode",
    'A .- was supposed to be here',
    'Argument of .- has an extra',
    'Dimension too large',
    'File ended while',
    'Font .- not load',
    "I can't",
    'Improper',
    'Incompatible',
    'Incomplete',
    'Infinite glue',
    'Interruption',
    'Limit controls must follow',
    'No room for a new',
    'Number too big',
    'Only one %# is allowed',
    "Sorry, I can't find",
    'You already have nine',
}

function errorHandler:isErrorLine(line)
  local _, last = string.find(line, '^! ')
  if not last then
      _, last = string.find(line, '^' .. filepat .. ':%d+: ')
  end

  return last
end

function errorHandler:isRunawayLine(line)
  local _, last = string.find(line, '^Runaway argument%?')
  return last
end

function errorHandler:tryUnwrapping(position)
  -- If there were no matches, let's try to unwrap the first line
  if Lines:seemsWrapped(position) then
      local merged = Lines:get(position) .. Lines:get(position +1)
      for _, pat in ipairs(self.patterns) do
          if string.find(merged, pat) then return true, position +1 end
      end
  end
  return false, position
end

function errorHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  -- Errors that do not start with "! " or "file:line: "
  local last = self:isRunawayLine(line)
  if last then return true, {numLines = 1} end

  last = self:isErrorLine(line)
  if not last then return false, {} end

  -- This is (almost certainly) an error; let's check
  -- if it matches a known message (we do this to decide
  -- whether we should unwrap the first line)
  local identified = false
  for _, pat in ipairs(self.patterns) do
      local _, ok = string.find(line, pat)
      if ok then identified = true break end
  end

  if not identified then wrapped, position = self:tryUnwrapping(position) end

  -- if we still could not find a match, it is an unknown error message.
  -- Things should still work ok, as long as the first line is not wrapped.
  -- If that is not the case, the remaining lines of this message will
  -- probably not be identified as such.

  -- Let's look ahead to identify the other lines that are part
  -- of the error message. We do not want to look too much ahead,
  -- so we just scan the current buffer.

  local lastline = position
  position = position +1
  -- We use "<", not "<=", so we can access the following line too
  while position < Lines:numLines() do

      -- if there is a second error, don't look further ahead
      if self:isErrorLine(Lines:get(position))
              or self:isRunawayLine(Lines:get(position))
      then break end

      -- This is always the last line
      if string.find(Lines:get(position),
                     '^Type%s+H %<return%>%s+for immediate help%.')
      then
          lastline = position
          break
      end

      local length = string.len(Lines:get(position))
      if length > 0 then -- skip empty lines
          if string.find(Lines:get(position +1),
                         "^" .. string.rep(" ", length))
          then
              -- this line and the next belong to the message;
              -- on the next iteration, check the line after that
              position = position +1
              lastline = position
          elseif string.find(Lines:get(position), '^l%.%d+ ') then
              lastline = position
          else
              break -- this line belongs to a different message
          end
      end

      position = position +1
  end

  -- position starts at zero, so numlines needs +1
  return true, {numLines = lastline +1, wrapped = wrapped}
end

function errorHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end
  ERRORS_DETECTED = true
  self:reallyHandleFirstLine(data)
  return true
end

function errorHandler:reallyHandleFirstLine(data)
  flushUnrecognizedMessages()

  if data.wrapped then
      Lines:unwrapOneLine()
      data.numLines = data.numLines -1
  end

  self.message = self:newMessage()
  self.message.severity = UNKNOWN
  self.message.content = Lines.current
  Lines:handledChars()

  self.processed = 1
  self.numLines = data.numLines
  self.doit = self.handleLines
  nextHandler = self
end

errorHandler.doit = errorHandler.handleFirstLine

function errorHandler:handleLines()
  if self.processed >= self.numLines then -- We're done!
      self.doit = self.handleFirstLine
      dispatch(self.message)
      return true
  end

  local last = nil

  if self.processed == self.numLines -1 then
      -- last line; there may be some other messages at the end
      last = string.len(Lines.current)
      for _, handler in ipairs(anywhereHandlers) do
          local match, data = handler:lookahead()
          if match and data.first -1 < last then last = data.first -1 end
      end
  end

  self:outputCurrentLine(last)

  self.processed = self.processed +1
  nextHandler = self
  return true
end

function errorHandler:outputCurrentLine(last)
   -- "last" is nil in all but the last line
  local line = string.sub(Lines.current, 1, last)
  self.message.content = self.message.content .. '\n' .. line
  Lines:handledChars(last)
end

pseudoErrorHandler = errorHandler:new()

pseudoErrorHandler.patterns = {
    '^pdfTeX warning %(ext4%): destination with the same identifier %b() '
               .. 'has been already used, duplicate ignored',
    '^pdfTeX warning: \\pdfendlink ended up in different nesting level '
               .. 'than \\pdfstartlink',
    '^warning%s+%(pdf backend%): ignoring duplicate destination with '
               .. "the name '.-'",
}

function pseudoErrorHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  for _, pat in ipairs(self.patterns) do
      _, last = string.find(line, pat)
      if last then break end
  end

  -- Maybe we should unwrap this line?
  local wrapped = false
  if not last then
      wrapped, position = self:tryUnwrapping(position)
      if not wrapped then return false, {} end
  end

  local lastline = position

  -- Looks like a pseudoError; Let's look ahead to identify the other
  -- lines that are part of the message. We do not want to look too
  -- much ahead, so we just scan the current buffer.

  position = position +1
  -- We use "<", not "<=", so we can access the following line too
  while position < Lines:numLines() do

      -- if there is a second error, don't look further ahead
      if self:isErrorLine(Lines:get(position))
              or self:isRunawayLine(Lines:get(position))
      then break end

      local length = string.len(Lines:get(position))
      if length > 0 then -- skip empty lines
          if string.find(Lines:get(position +1),
                         "^" .. string.rep(" ", length))
          then
              -- this line and the next belong to the message;
              -- on the next iteration, check the line after that
              position = position +1
              lastline = position
          elseif string.find(Lines:get(position), '^l%.%d+ ') then
              lastline = position
          else
              break -- this line belongs to a different message
          end
      end

      position = position +1
  end

  -- position starts at zero, so numlines needs +1
  return true, {numLines = lastline +1, wrapped = wrapped}
end

function pseudoErrorHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  local terseContent = Lines.current
  if data.wrapped then terseContent = terseContent .. Lines:get(1) end

  errorHandler.reallyHandleFirstLine(self, data)
  self.message.severity = WARNING
  self.message.terseContent = terseContent
  return true
end

function pseudoErrorHandler:outputCurrentLine(last)
   -- "last" is nil in all but the last line
  local line = string.sub(Lines.current, 1, last)
  if MINLEVEL <= INFO then
      self.message.content = self.message.content .. '\n    ' .. line
  end
  Lines:handledChars(last)
end

pseudoErrorHandler.doit = pseudoErrorHandler.handleFirstLine

-------------------------------------------------------------------------------
-- epilogueHandler
--
-- This handles the generic messages at the end of each LaTeX run. We could
-- also handle each one with stringsHandler. WARNING! There may be lines in
-- the epilogue that are not wrapped as the rest; they may either not be
-- wrapped or they may be wrapped at "wrong" line lengths. We solve this
-- by always trying to unwrap (this is only ok because no patterns end with
-- ".* or ".-).
-------------------------------------------------------------------------------

epilogueHandler = HandlerPrototype:new()

epilogueHandler.beginPatterns = {
  -- This appears in the logfile but not on stdout
  "^Here is how much of .-TeX's memory you used:",
  -- apparently, pdflatex writes this on stdout:
  "^%(see the transcript file for additional information%)",
  -- while lualatex writes this on stdout:
  "^ *%d+ words of node memory still in use:",
}

epilogueHandler.patterns = {
  "^%s*%d+ strings out of %d+",
  "^%s*%d+ string characters out of %d+",
  "^%s*%d+ words of memory out of %d+",
  "^%s*%d+ words of font info for %d+ fonts, out of %d+ for %d+",
  "^%s*%d+,%d+ words of node,token memory allocated",
  "^%s*%d+ words of node memory still in use:",
  "^%s*%d+ hyphenation exceptions out of %d+",

  -- This line lists several different values that may or may not
  -- be present in a given execution; hopefully this will catch
  -- all variations.
  "^%s*%d+ hlist, %d+ vlist,.*, %d+ pdf%_%S+ nodes",

  "^%s*avail lists: %d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,"
          .. "%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+",

  "^%s*avail lists: %d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,"
          .. "%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+,%d+:%d+",

  "^%s*%d+ multiletter control sequences out of %d+%+%d+",
  "^%s*%d+ fonts using %d+ bytes",
  "^%s*%d+i,%d+n,%d+p,%d+b,%d+s stack positions out of %d+i,%d+n,%d+p,%d+b,%d+s",
  "^Output written on " .. filepat .. " %(%d+ pages, %d+ bytes%)%.",
  "^Transcript written on " .. filepat .. "%.",

  -- pdftex outputs these two separately; luatex outputs both on a single line
  "^PDF statistics: %d+ PDF objects out of %d+ %(max%. %d+%)",
  "^PDF statistics:",
  "^%s*%d+ PDF objects out of %d+ %(max%. %d+%)",

  "^%s*%d+ compressed objects within %d+ object streams",
  "^%s*%d+ named destinations out of %d+ %(max%. %d+%)",
  "^%s*%d+ words of extra memory for PDF output out of %d+ %(max%. %d+%)",
}

function epilogueHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local first, last
  for _, pat in ipairs(self.beginPatterns) do
      _, last = string.find(line, pat)
      if last ~= nil then break end
  end

  if last ~= nil then return true, {line = line} end

  if position == 0 and not EPILOGUE then return false, {} end

  for _, pat in ipairs(self.patterns) do
      _, last = string.find(line, pat)
      if last ~= nil then break end
  end

  if last then return true, {line = line} end

  local ok, data = self:tryUnwrapping(position)

  if not ok then return false, {} end
  return true, {line = data.match}
end

function epilogueHandler:doit()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  EPILOGUE = true
  flushUnrecognizedMessages()
  unwrapUntilStringMatches(data.line)

  msg = self:newMessage()
  msg.severity = DEBUG
  msg.content = Lines.current
  Lines:handledChars()
  dispatch(msg)
  return true
end


-------------------------------------------------------------------------------
-- fpHandler
--
-- Handles the messages output by the "fp" (fixed point) package, which
-- look like ( FP-UPN ( FP-MUL ) ( FP-ROUND ) ) etc.
--
-- They are \message's, so they may appear anywhere on a line. Usually,
-- several of such messages appear together, so line wrapping is common.
-- We handle the parens with a stack independent from the openFiles stack.
-------------------------------------------------------------------------------

fpHandler = HandlerPrototype:new()

function fpHandler:init()
  self.stack = Stack:new()
end

-- The space between the open parens char and "FP" is optional
-- because it is ommited in case of line wrapping.
fpHandler.loosePattern = '%( ?FP%-[^%s%)]+'
fpHandler.strictPattern = '^%s*' .. fpHandler.loosePattern
fpHandler.pattern = fpHandler.strictPattern


function fpHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  -- When we are looking into the future, let's just lie: since this
  -- handler deals with several similar short messages in sequence,
  -- preventing unwrapping is a very bad idea, because it will affect the
  -- processing of the current message. As for other messages looking into
  -- the future, openParensHandler:canDoit() return value will work fine.
  if position > 0 then return false, {} end

  while true do
      local first = string.find(line, self.pattern)
      if first ~= nil then return true, {first = first} end

      if not Lines:seemsWrapped(position) then return false, {} end

      line = line .. Lines:get(position +1)
      position = position +1
  end
end

function fpHandler:lookahead()
  self.pattern = self.loosePattern
  local match, data = self:canDoit()
  self.pattern = self.strictPattern
  -- Only return true if whatever matches
  -- is not at the beginning of the line
  if match and data.first == 1 then return false, {} end
  return match, data
end

function fpHandler:startProcessing()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  self.message = self:newMessage()
  self.message.severity = DEBUG
  self.message.content = ""
  self.doit = self.process
  nextHandler = self
  return true
end

fpHandler.doit = fpHandler.startProcessing

function fpHandler:process()
  while true do
      local _, last = string.find(Lines.current, self.pattern)
      if last ~= nil then return self:processOpen(last) end

      _, last = string.find(Lines.current, '%s*%)')
      if last ~= nil then return self:processClose(last) end

      if Lines:seemsWrapped() then
          Lines:unwrapOneLine()
      else
          -- This should never happen, but if it does
          -- we will probably end up in an endless loop
          io.stderr:write("    texlogsieve: parsing error near input line "
                               .. Lines.linenum .. " (fpHandler:process)\n")

          PARSE_ERROR = true

          dispatch(self.message)
          self.message = nil
          self.doit = self.startProcessing
          self.stack = Stack:new()
          return true
      end
  end
end

function fpHandler:processOpen(last)
  self.message.content = self.message.content
                         .. string.sub(Lines.current, 1, last)

  Lines:handledChars(last)
  self.stack:push("DUMMY")
  nextHandler = self

  return true
end

function fpHandler:processClose(last)
  self.message.content = self.message.content
                         .. string.sub(Lines.current, 1, last)

  Lines:handledChars(last)

  if self.stack:pop() == nil then
      io.stderr:write("    texlogsieve: parsing error near input line "
                           .. Lines.linenum .. " (fpHandler:processClose)\n")

      PARSE_ERROR = true
  end

  if self.stack:empty() then
      dispatch(self.message)
      self.message = nil
      self.doit = self.startProcessing
  else
      nextHandler = self
  end

  return true
end


-------------------------------------------------------------------------------
-- underOverFullBoxHandler
--
-- Handles under/overfull multiline messages. There are usually important,
-- so we want to be able to filter them specifically and also to present
-- a summary about them. Besides that, they may contain spurious "(", ")",
-- "[", and "]" characters that might confuse the program if left as
-- "unrecognized".
-------------------------------------------------------------------------------

underOverFullBoxHandler = HandlerPrototype:new()

function underOverFullBoxHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local basePattern = "^([UO][nv][de][e]?r)full \\(.)box (%b())"
  local first, last, underover,
        verthoriz, amount = string.find(line, basePattern)

  if first == nil then
      return false, {}
  else
      return true, {underover = underover, verthoriz = verthoriz,
                    amount = amount, last = last}
  end
end

function underOverFullBoxHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self.message = underOverMessage:new()
  self.message.content = string.sub(Lines.current, 1, data.last)
  self.message.underover = data.underover
  self.message.verthoriz = data.verthoriz
  self.message.amount = data.amount
  self.message.severity = WARNING
  if string.find(data.amount, 'badness 10000') then
      self.message.severity = CRITICAL
  end
  Lines:handledChars(data.last)

  self.doit = self.handleClosing

  _, last = string.find(Lines.current, "has occurred while \\output is active")
  if last ~= nil and data.verthoriz == 'h' then
      self.doit = self.handleOffendingText
  end

  if last == nil then
      _, last = string.find(Lines.current, "in %S+ at lines %d+%-%-%d+")
      if last ~= nil then
          self.doit = self.handleOffendingText
      end
  end

  if last == nil then
      _, last = string.find(Lines.current, "detected at line %d+")
      if last ~= nil and data.verthoriz == 'h' then
          self.doit = self.handleOffendingText
      end
  end

  if last == nil then
      io.stderr:write("    texlogsieve: parsing error near input line "
                           .. Lines.linenum
                           .. " (underOverFullBoxHandler:handleFirstLine)\n")

      PARSE_ERROR = true

      self.doit = self.handleFirstLine
      dispatch(self.message)
      self.message = nil
      return true
  end

  self.message.content = self.message.content
                         .. string.sub(Lines.current, 1, last)

  Lines:handledChars(last)
  nextHandler = self

  return true
end

underOverFullBoxHandler.doit = underOverFullBoxHandler.handleFirstLine

-- This is somewhat fragile: if the line is wrapped and the second part
-- of the message looks like a different message, it will be left to be
-- processed on the next iteration. This is particularly easy to happen
-- if the offending text includes something like [X] or [X.Y], which
-- looks like a shipout.
function underOverFullBoxHandler:handleOffendingText()
  self:unwrapLines()
  self.message.failedText = Lines.current
  Lines:handledChars()
  self.doit = self.handleClosing
  nextHandler = self
  return true
end

function underOverFullBoxHandler:handleClosing()
  local _, last, closing = string.find(Lines.current, '^%s*(%b[])%s*$')
  if last ~= nil then
      Lines:handledChars(last)
      self.message.closing = closing
  end

  dispatch(self.message)
  self.message = nil
  self.doit = self.handleFirstLine
  return true
end


-------------------------------------------------------------------------------
-- stringsHandler
--
-- This is the most complex handler. It deals with predefined strings that
-- may span multiple lines. The message may start anywhere on the line (this
-- handler can be in anywhereHandlers) and may end before the end of the
-- line. In fact, depending on max_print_line, it is even possible that the
-- many lines of the message are concatenated in a single line in the log
-- or, conversely, that some lines are wrapped. So, for example, the
-- developer may have envisioned a message like
--
-- ********************
-- *    Hi there!     *
-- ********************
--
-- but this might turn to
--
-- ******************** *    Hi there!     * ********************
--
-- or
--
-- ******************
-- **
-- *    Hi there!    
--  *
-- ******************
-- **
--
-- or even
--
-- ******************
-- ** *    Hi there! 
--     * ************
-- ********
--
-- So, what we do here is to consider every line in the search pattern as a
-- segment of the message that may start in the middle of a line and/or may
-- be wrapped at the end of the line. We match each of these segments in
-- turn. Because of that, the patterns should completely match all lines in
-- the message, not just the beginning of the line followed by ".*" or ".+".
--
-- Still, if you know that a specific message or line (1) always starts
-- at the beginning of the line, (2) never gets wrapped, and (3) is never
-- followed by another message in the same line, then you can match just
-- the beginning of the line and use ".*" for the rest. There are a few
-- messages below defined like this.
--
-- Note that, when the handler is executed, the pattern *is* at the beginning
-- of the current line (in some cases, with leading spaces) because, if there
-- was any leading text originally in the line, some previous handler has
-- already removed it. This is why we can (and should!) anchor the pattern
-- with "^%s*". As before, if you do know that the line always starts at the
-- beginning of a line, you may instead anchor the pattern with "^". Either
-- way, we assume we cannot know the true length of the line, so it may be
-- wrapped at any point. Also, do not forget about lookahead(), which removes
-- the "^" anchor.
-------------------------------------------------------------------------------

stringsHandler = HandlerPrototype:new()

stringsHandler.IHandleAnywhere = false

function stringsHandler:canDoit(position)
  for _, pattern in ipairs(self.patterns) do
      local success, data = self:canDoitRecursive(pattern, position, 0, 0)
      if success then
          data.pattern = pattern
          return true, data
      end
  end

  return false, {}
end

function stringsHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self.message = self:newMessage()
  self.message.severity = self.severity

  -- If we know the pattern must match at the beginning of a line,
  -- the pattern may or may not include spaces at the start. If that
  -- is not the case, however, there may be added spaces before the
  -- message, so it is better to remove them from the first line (in
  -- the others, they may be indentation or somehow relevant)
  if self.IHandleAnywhere then
      local _, last = string.find(Lines.current, '^%s+')
      if last ~= nil then Lines:handledChars(last) end
  end

  self.captures = {} -- just in case we want to use captures
  self.patternLines = data.pattern -- the table with the pattern lines
  self.patternLineNumber = 1
  self.doit = self.handleLines -- for the next lines, if any

  -- after this initial setup, the first line is handled like the others
  self:handleLines()

  return true
end

stringsHandler.doit = stringsHandler.handleFirstLine

function stringsHandler:handleLines()

  local pat = self.patternLines[self.patternLineNumber]

  -- unwrapUntilPatternMatches() will probably not unwrap a line if
  -- the pattern does not represent the whole line (think about it);
  -- even if it does, it will fail to detect the correct end of the
  -- line (how could it?). Conversely, it may try to unwrap needlessly
  -- if the pattern ends with something like ".+" (think about it too).
  -- So, you should only use such patterns when the line can never be
  -- wrapped.
  local last, tmp = unwrapUntilPatternMatches(pat)
  for _, val in ipairs(tmp) do table.insert(self.captures, val) end

  if last == nil then
      io.stderr:write("    texlogsieve: parsing error near input line "
                           .. Lines.linenum
                           .. " (stringsHandler:handleLines)\n")

      PARSE_ERROR = true

      dispatch(self.message)
      self.message = nil
      self.doit = self.handleFirstLine
      return true
  end

  -- trailing spaces in any line in this kind of messsage
  -- are safe to ignore and may appear in \message's
  local chunk = trimRight(string.sub(Lines.current, 1, last))
  if self.patternLineNumber == 1 then
      self.message.content = chunk
  else
      self.message.content = self.message.content .. '\n' .. chunk
  end

  Lines:handledChars(last)

  if self.patternLineNumber >= #self.patternLines then
      self:processCaptures()
      dispatch(self.message)
      self.message = nil
      self.doit = self.handleFirstLine
  else
      self.patternLineNumber = self.patternLineNumber +1
      nextHandler = self
  end

  return true
end

-- When a pattern takes more than one line, we handle each
-- line separately. To do that, we need to transform the
-- multiline strings of the patterns into tables where each
-- line is an element.
function stringsHandler:init()
  self.strictPatterns = {}
  self.loosePatterns = {}

  for _, pat in ipairs(self.patterns) do
      local patternLinesAsTableItems = linesToTable(pat)
      table.insert(self.strictPatterns, patternLinesAsTableItems)

      -- remove the leading '^'
      patternLinesAsTableItems = linesToTable(string.sub(pat, 2))
      table.insert(self.loosePatterns, patternLinesAsTableItems)
  end

  self.patterns = self.strictPatterns
end

-- The pattern we want to check may stretch over several lines. This
-- function recursively checks each line of the pattern against the
-- corresponding input line, but only up to five lines, as that is
-- enough to make sure the pattern really matches.
function stringsHandler:canDoitRecursive(patternLines,
                                             position, offset, depth)

  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  -- skip what was processed in a previous iteration/recursion
  if offset > 0 then line = string.sub(line, offset +1) end

  local nextline = ""

  local patternLine = patternLines[1]

  while true do
      local first, last = string.find(line, patternLine)

      -- see comment below about "nextline, patternLine"
      if first ~= nil and not string.find(nextline, patternLine) then
          -- Found it!
          if depth > 4 -- 5 lines matched, that is enough
                       or #patternLines == 1 -- no more pattern lines
                       or Lines:get(position +1) == nil -- no more input lines

          then
              return true, {first = first}
          else
              -- this line matches; check the next ones too just to make sure
              if last < string.len(line) then
                  -- continue on the same line, skip what we already processed
                  offset = last + offset
              else
                  position = position +1 -- proceed to next line...
                  offset = 0 -- ...starting at the beginning of the line
              end

              depth = depth +1
              patternLines = {table.unpack(patternLines, 2)}

              local success = self:canDoitRecursive(patternLines,
                                                    position, offset, depth)

              return success, {first = first}
          end
      end

      -- no success, but can we unwrap this line?
      if not Lines:seemsWrapped(position) then return false, {} end
      -- yep! However, we need to be careful: if we match
      -- on the next line by itself, then the match is not
      -- the result of unwrapping, so we should return false.
      -- We only return true if the match occurs only when
      -- the lines are concatenated. Do not trust that the
      -- pattern is anchored to the beginning of the line,
      -- lookahead() changes this.
      offset = -1 * string.len(line)
      nextline = Lines:get(position +1)
      line = line .. nextline
      position = position +1
  end
end

-- Just like :canDoit(), but does not anchor patterns to the
-- beginning of the line (used by handleUnrecognizedMessage).
-- Notice the similarity to openCloseHandlerPrototype:lookahead().
function stringsHandler:lookahead()
  self.patterns = self.loosePatterns
  local match, data = self:canDoit()
  self.patterns = self.strictPatterns
  -- Only return true if whatever matches
  -- is not at the beginning of the line
  if match and data.first == 1 then return false, {} end

  return match, data
end

function stringsHandler:processCaptures()
    -- by default, do nothing
end


-------------------------------------------------------------------------------
-- beginningOfLineDebugStringsHandler
-- beginningOfLineInfoStringsHandler
-- beginningOfLineWarningStringsHandler
-- anywhereDebugStringsHandler
-- anywhereInfoStringsHandler
-- anywhereWarningStringsHandler
-- (from stringsHandler)
--
-- they differ from the prototype only by severity level
-- and the set of patterns to search for.
-------------------------------------------------------------------------------

-- We know these messages always start at the beginning of a line
-- Always start these patterns with "^", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
beginningOfLineDebugStringsHandler = stringsHandler:new()
beginningOfLineDebugStringsHandler.severity = DEBUG
beginningOfLineDebugStringsHandler.patterns = {
  '^This is .*TeX, Version.*',
  '^%s*restricted system commands enabled%.',
  '^%s*entering extended mode',
  '^%s*restricted \\write18 enabled%.',
  '^%s*\\write18 enabled%.',
  '^%s*%%%&%-line parsing enabled%.',

  -- Two diferent ways of saying "**jobname":
  '^%*%*' .. filepat .. '$',
  -- if the jobname does not include the extension, we use the first
  -- part of filepat but also excluding the backslash character
  '^%*%*[^%%:;,%=%*%?%|%&%$%#%!%@"\\%`\'%<%>%[%]%{%}]+$',

  "^%s*%*%*%*%*%*%*%*%*%*%*%*\n"
  .. "LaTeX2e <" .. datepat .. ">.*\n"
  .. "%s*L3 programming layer <" .. datepat .. ">.*\n"
  .. "%s*%*%*%*%*%*%*%*%*%*%*%*",

  '^\\[^%s=]+=[^%s=]+', -- "\c@chapter=\count174"
  "^\\openout%d+%s*=%s*`?[^']+'?%.?",

  '^Lua module: lualibs%-extended ' .. datepat
                   .. ' %S+ ConTeXt Lua libraries %-%- extended collection%.',

  '^Lua module: lualibs ' .. datepat
                   .. ' %S+ ConTeXt Lua standard libraries%.',

  '^Lua module: fontspec ' .. datepat
                   .. ' %S+ Font selection for XeLaTeX and LuaLaTeX',

  '^Lua module: lualatex%-math ' .. datepat
                   .. ' %S+ Patches for mathematics typesetting with LuaLaTeX',

  '^Lua module: luaotfload ' .. datepat
                   .. ' %S+ Lua based OpenType font support',

  '^Lua module: microtype ' .. datepat .. ' %S+ microtype module%.',

  '^luaotfload | main : initialization completed in [%d.]+ seconds',

  '^luaotfload | init : Context OpenType loader version.*',

  '^luaotfload | init : Loading fontloader '
                   .. '["“][^"]+["”] from .-["“][^"]+["”]%.',

  '^luaotfload | db : Font names database not found, generating new one%.',

  '^luaotfload | db : This can take several minutes; please be patient%.',

  -- there may be dots in the path, so we need to
  -- anchor the final dot to the end of the line
  '^luaotfload | conf : Root cache directory is "?[^"]-"?%.$',
  '^luaotfload | db : Font names database loaded from .-%.luc%.gz',
  '^luaotfload | db : Font names database loaded from .-%.luc',
  '^luaotfload | cache : Lookup cache loaded from .-%.luc%.',
  '^luaotfload | main : initialization completed in [%d%.]+ seconds',

  '^Lua%-only attribute.-=%s*%S+',
  "^Inserting %b`' at position .- in %b`'%.",
  "^Inserting %b`' in %b`'%.",
  "^Removing +%b`' from %b`'%.",

  "^For additional information on amsmath, use the `%?' option%.",

  "^Loading configuration file `" .. filepat .. "'%.",
  "^contour: Using driver file `" .. filepat .. "'%.",

  '^%[Loading MPS to PDF converter %(version ' .. datepat .. '%)%.%]',


  '^See the caption package documentation for explanation%.',

  '^Lua module: luaotfload%-main ' .. datepat
                   .. ' [%d%.]+ luaotfload entry point',

  '^Lua module: luaotfload%-init ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / initialization',

  '^Lua module: luaotfload%-log ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / logging',

  '^Lua module: luaotfload%-parsers ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / filelist',

  '^Lua module: luaotfload%-configuration ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / config file reader',

  '^Lua module: luaotfload%-loaders ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / callback handling',

  '^Lua module: luaotfload%-database ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / database',

  '^Lua module: luaotfload%-unicode ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / Unicode helpers',

  '^Lua module: luaotfload%-colors ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-resolvers ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / resolvers',

  '^Lua module: luaotfload%-features ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / features',

  '^Lua module: luaotfload%-letterspace ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-embolden ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-notdef ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / color',

  '^Lua module: luaotfload%-auxiliary ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / auxiliary functions',

  '^Lua module: luaotfload%-multiscript ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / multiscript',

  '^Lua module: luaotfload%-harf%-define ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / database',

  '^Lua module: luaotfload%-script ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / Script helpers',

  '^Lua module: luaotfload%-harf%-plug ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / database',

  '^Lua module: luaotfload%-fallback ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / fallback',

  '^Lua module: luaotfload%-tounicode ' .. datepat
                   .. ' [%d%.]+ luaotfload submodule / tounicode',

  '^' .. string.rep('%*', 37) .. '\n'
      .. '%* Using libertinus math %*\n'
      .. string.rep('%*', 37),

  '^`inconsolata%-zi4\' v%S-, ' .. datepat
                   .. ' Text macros for Inconsolata %(msharpe%)',

  '^Requested font ".-" at [%d%.]+pt\n ?%-> ' .. filepat,
  '^Requested font ".-" scaled %d+\n ?%-> ' .. filepat,
  '^Requested font ".-" at [%d%.]+pt',
  '^Requested font ".-" scaled %d+',

  '^<QR code requested for ".-" in version .->',

  '^' .. datepat .. ' %S+ pgflibraryvectorian%.code%.tex',

  '^Custom whatsit start%_tag %= %d+',
  '^Custom whatsit end%_tag %= %d+',

  '^Lua module: spelling ' .. datepat
              .. ' v%S+ support for spell%-checking of LuaTeX documents',

  -- Usually these are warnings, but for font "nil", why bother?
  '^luaotfload | aux : font no %d+ %(nil%) does not define feature '
                            .. '.- for script .- with language %S+',

  '^luaotfload | aux : font no %d+ %(nil%) defines no feature for script %S+',

  -- From IEEEtran.cls
  '^%-%- This is a[n]? %d+ point document%.',
  '^%-%- Lines per column: %S+ %(%S+%)%.',

  '^%-%- See the "IEEEtran%_HOWTO" manual for usage information%.\n'
               .. '%-%- http://www%.michaelshell%.org/tex/ieeetran/',

  '^%-%- Using %S+ x %S+ %b() paper%.',
  '^%-%- Using %S+ output%.',
  '^%-%- Verifying Times compatible math font%.',
  '^%-%- %S+ loaded, OK%.',

  -- From libertinust1math.sty
  '^amsthm loaded',
  '^amsthm NOT loaded',

  -- refcheck
  '^options: showrefs, showcites, msgs, chckunlbld',

  "^`newtxtext' v[%d%.]+, " .. datepat .. " Text macros taking advantage of "
              .. "TeXGyre Termes and its extensions %(msharpe%)",
}


-- These messages may start anywhere in a line
-- Always start these patterns with "^%s*", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
anywhereDebugStringsHandler = stringsHandler:new()
anywhereDebugStringsHandler.IHandleAnywhere = true
anywhereDebugStringsHandler.severity = DEBUG
anywhereDebugStringsHandler.patterns = {
  '^%s*LaTeX2e <' .. datepat .. '>\n%s*patch%s?level %S+',
  '^%s*LaTeX2e <' .. datepat .. '>\n%s*patch %S+',
  '^%s*LaTeX2e <' .. datepat .. '> patch%s?level %S+',
  '^%s*LaTeX2e <' .. datepat .. '> patch %S+',
  '^%s*LaTeX2e <' .. datepat .. '>',

  '^%s*L3 programming layer <' .. datepat .. '>\n%s*patch%s?level %S+',
  '^%s*L3 programming layer <' .. datepat .. '>\n%s*patch %S+',
  '^%s*L3 programming layer <' .. datepat .. '> patch%s?level %S+',
  '^%s*L3 programming layer <' .. datepat .. '> patch %S+',
  '^%s*L3 programming layer <' .. datepat .. '>',

  '^%s*xparse %b<>',
  '^%s*%{.*pdftex%.map%}',

  '^%s*ABD: EverySelectfont initializing macros',
  '^%s*ABD: EveryShipout initializing macros',

  '^%s*<<.-%.cmap>>',

  '^%s*' .. string.rep('%*', 65) .. '%s*\n'
         .. 'GFS%-Solomos style file by A%. Tsolomitis%s*\n'
         .. string.rep('%*', 65),

  -- <blah.jpg, id=555, [...,] 722.7pt x 722.7pt>
  '^%s*%<' .. filepat .. ', id=.- [%d%.]+pt x [%d%.]+pt%>',
  '^%s*%<use ' .. filepat .. '%>', -- <use blah.jpg>
  '^%s*%<use ' .. filepat .. ', page %d-%>', -- <use blah.jpg, page ???>

  "^%s*`Fixed Point Package', .- %(C%) Michael Mehlich",

  "^%s*`newtxmath' v%S+, " .. datepat
          .. " Math macros based originally on txfonts %(msharpe%)",

  "^%s*`newtxtt' v%S+, " .. datepat
          .. " Typewriter text macros based on txfonts %(msharpe%)",

  '^%s*%* soulpos %- computing points %- it may take a few seconds %*',

  -- package qrcode
  '^%s*<Calculating QR code for ".-" in version .->',
  '^%s*<Splitting off block %d+>',
  '^%s*<Making error block %d+>',
  '^%s*<Interleaving datablocks of length %d+\\? and %d+: [ %d,]+%.>',
  '^%s*<Interleaving errorblocks of length %d+: [ %d,]+%.>',
  '^%s*<Interleaving complete%.>',
  '^%s*<Writing data%.%.%.%s?done%.>',
  '^%s*<Applying Mask %d+%.%.%.%s?done%. Calculating penalty%.%.%.%s?penalty is %d+>',
  '^%s*<Selected Mask %d+>',
  '^%s*<Encoding and writing format string%.%.%.%s?done%.>',
  '^%s*<Encoding and writing version information%.%.%.%s?done%.>',
  '^%s*<Saving QR code to memory%.%.%.%s?done%.>',
  '^%s*<Writing QR code to aux file%.%.%.%s?done%.>',
  '^%s*<Printing matrix%.%.%.%s?done%.>',
  '^%s*<Error%-correction level increased from %S+%s?  to %S+ at no cost%.>',
  '^%s*<Inserting dummy QR code in draft mode for "[^"]+" in version .-%.>',
  '^%s*<Copying the QR code for "[^"]+" in version .- as previously calculated%.>',
  '^%s*<Reading QR code for "[^"]+" at level %S+ from aux file%.>',
  -- package "comment"
  "^%s*Special comment '[^']+'",
  "^%s*General comment '[^']+'",
  "^%s*Include comment '[^']+'",
  "^%s*Excluding comment '[^']+'",
  "^%s*Lines%-Processing comment '[^']+'",
  "^%s*Including '[^']+' comment%.",
  "^%s*Excluding '[^']+' comment%.",
  "^%s*Processing '[^']+' comment%.",
  "^%s*Comment '[^']+' writing to " .. filepat .. "%.",
  "^%s*Straight input of " .. filepat .. "%.",
  "^%s*Include comment '[^']+' up to level '[^']+'",
  "^Lua module%: autotype " .. datepat .. " v%S+ automatic "
                                .. "language%-specific typography",
  "^Lua module%: pdnm%_nl%_manipulation " .. datepat .. " v%S+ "
                        .. "pattern driven node list manipulation",
}


-- We know these messages always start at the beginning of a line
-- Always start these patterns with "^", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
beginningOfLineInfoStringsHandler = stringsHandler:new()
beginningOfLineInfoStringsHandler.severity = INFO
beginningOfLineInfoStringsHandler.patterns = {
  "^%s*system commands enabled%.",
  "^Writing index file.*%.idx",
  "^Writing glossary file.*%.glo",
  "^%*geometry%* driver:.*",
  "^%*geometry%* detected driver:.*",
  "^Driver file for pgf:.*%.def",
  "^Package pgfplots: loading complementary utilities for your pgf version%.%.%.",
  "^%s*file:line:error style messages enabled%.",
  "^Applying: %b[] .- on input line %d-%.",
  "^Already applied: %b[] .- on input line %d-%.",
  "^\\%S+ = a dialect from .*",
  -- TODO: we should capture the jobname and use it here
  "^No file .-%.aux%.",
  "^No file .-%.ind%.",
  "^No file .-%.bbl%.",
  "^No file .-%.gls%.",

  "^runsystem%b()%.%.%.executed safely %(allowed%)%.",
  "^runsystem%b()%.%.%.executed%.?",

  'luaotfload | db : Reload initiated %(formats: .-%); reason: Font ".-" not found%.',

  "^reledmac reminder:%s*\n"
        .. "%s*The number of the footnotes in this section "
        .. "has changed since the last run.\n"
        .. "%s*You will need to run LaTeX two more times "
        .. "before the footnote placement\n"
        .. "%s*and line numbering in this section are correct%.",

  "^ ?LaTeX document class for Lecture Notes in Computer Science",

  -- Remember that order matters here!
  "^" .. string.rep("%*", 37) .. "%*?%*?\n"
          .. "%* Local config file " .. filepat .. " used\n"
          .. string.rep("%*", 37) .. "%*?%*?\n",

  "^" .. string.rep("%*", 38) .. "\n"
          .. "%*\n"
          .. "%* Local config file " .. filepat .. " used\n"
          .. "%*\n"
          .. string.rep("%*", 38) .. "\n",

  "^" .. string.rep("%*", 37) .. "%*?%*?\n"
          .. "%* Local config file " .. filepat .. " used\n"
          .. "%*\n",

  "^" .. string.rep("%=", 36) .. "\n"
          .. "%s*Local config file " .. filepat .. " used\n"
          .. string.rep("%=", 36) .. "%=?\n",

  "^Local config file " .. filepat .. " used\n",

  "^=== Package selnolig, Version %S+, Date " .. datepat .. " ===",

  "^package spelling: Info%! %d+%/%d+ total%/new %S- strings "
              .. "read from file %b''%.",
  "^package spelling: Info%! %d+%/%d+ bad%/good %S- match rules "
              .. "read from module %b''%.",

  -- Package snapshot
  '^Dependency list written on .-%.dep%.',

  -- These come from IEEEtran.cls
  '^%*%* Times compatible math font not found, forcing%.',
  '^%-%- Found %S+, loading%.',
  '^%-%- Using IEEE %S+ Society mode%.',

  '^%*%* Conference Paper %*%*\n'
      .. 'Before submitting the final camera ready copy, remember to:\n'
      .. '1%. Manually equalize the lengths of two columns on the last page\n'
      .. 'of your paper%;\n'
      .. '2%. Ensure that any PostScript and/or PDF output post%-processing\n'
      .. 'uses only Type 1 fonts and that every step in the generation\n'
      .. 'process uses the appropriate paper size%.',

  '^%*%* ATTENTION: Overriding %S+ to %S+ via %S+%.',

  '^%*%* ATTENTION: Overriding inner side margin to %S+ and '
                      .. 'outer side margin to %S+ via %S+%.',

  '^%*%* ATTENTION: Overriding top text margin to %S+ and '
                      .. 'bottom text margin to %S+ via %S+%.',

  '^%*%* ATTENTION: \\IEEEPARstart is disabled in draft mode %(line %S+%)%.',
  '^%*%* ATTENTION: Overriding command lockouts %(line %S+%)%.',

  "^%*%* ATTENTION: Single column mode is not typically used "
                                 .. "with IEEE publications%.",

  '^%*%* ATTENTION: Technotes are normally 9pt documents%.',

  -- MiKTeX auto-updates
  '^======================================================================\n'
   .. 'starting package maintenance%.%.%.\n'
   .. 'installation directory: ' .. dirpat .. '\n'
   .. 'package repository: http.+\n'
   .. 'package repository digest: [%dabcdef]+\n'
   .. 'going to download %S+ .*bytes\n'
   .. 'going to install %d+ file%(s%) %(%d package%(s%)%)\n'
   .. 'downloading http.-%.%.%.\n'
   .. '%S+, %S+ Mbit/s\n'
   .. 'extracting files from ' .. filepat .. '%.%.%.\n'
   .. '======================================================================',
  "^Label %b`' newly defined as it shall be overriden\n"
   .. "although it is yet undefined",

   '^%* %* %* LNI %* %* %*',
   "^Style `ntheorem', Version %S+ <" .. datepat .. ">",
   "^`XCharter' v%S+, " .. datepat .. " Text macros for XCharter, "
                           .. "an extension of Charter %(msharpe%)",
   "^Document Style algorithmicx %S+ %- a greatly improved `algorithmic' style",
   "^Applying%: [" .. datepat .. "] Usage of raw or classic option list "
                                       .. "on input line %S+%.",
   "^Already applied%: [" .. datepat .. "] Usage of raw or classic "
                                       .. "option list on input line %S+%.",
}


-- These messages may start anywhere in a line
-- Always start these patterns with "^%s*", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
anywhereInfoStringsHandler = stringsHandler:new()
anywhereInfoStringsHandler.IHandleAnywhere = true
anywhereInfoStringsHandler.severity = INFO
anywhereInfoStringsHandler.patterns = {
  -- TODO: there are other "... patterns for blah blah"
  --       in texmf-dist/tex/generic/hyph-utf8/loadhyph
  "^%s*German Hyphenation Patterns %(Traditional Orthography%) "
                            .. "`dehyphts?%-x' " .. datepat .. " %(WL%)",

  '^%s*UTF%-8 German hyphenation patterns %(.- orthography%)',
  '^%s*EC German hyphenation patterns %(.- orthography%)',
  '^%s*German Hyphenation Patterns %(.- Orthography%)',

  "^%s*Swiss%-German Hyphenation Patterns %(.- Orthography%) "
                            .. "`dehyphts?%-x' " .. datepat .. " %(WL%)",

  "^%s*German Hyphenation Patterns %(.- Orthography.-%) "
                            .. "`dehyphn%-x' " .. datepat .. " %(WL%)",

  '^%s*UTF%-8 Swiss%-German hyphenation patterns %(.- orthography%)',
  '^%s*EC Swiss%-German hyphenation patterns %(.- orthography%)',
  '^%s*Swiss%-German Hyphenation Patterns %(.- Orthography%)',
  "^%s*dehyph%-exptl: using a TeX engine with native UTF%-8 support%.",

  '^%s*ASCII Hyphenation patterns for American English',
  '^%s*UTF%-8 %S+ hyphenation patterns',
  '^%s*EC %S+ hyphenation patterns',

  "^%s*<Requested QR version '[^']+' is too small for desired "
              .. "text%.%s?Version increased to '[^']+' to fit text%.>",

}


-- We know these messages always start at the beginning of a line
-- Always start these patterns with "^", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
beginningOfLineWarningStringsHandler = stringsHandler:new()
beginningOfLineWarningStringsHandler.severity = WARNING
beginningOfLineWarningStringsHandler.patterns = {
  '^luaotfload | aux : font no .- does not define feature '
                            .. '.- for script .- with language %S+',

  '^luaotfload | aux : font no .- %b() defines no feature for script %S+',
  '^luaotfload | aux : no font with id %d+',
  '^luaotfload | resolve : sequence of 3 lookups yielded nothing appropriate%.',

  "^warning  %(pdf backend%): ignoring duplicate destination "
                            .. "with the name '.-'",

  "^Couldn't patch \\%S+",
  "^Invalid UTF%-8 byte or sequence at line %d+ replaced by U%+FFFD%.",

  "^pdfTeX warning %(dest%): name%b{} has been referenced but does not "
                            .. "exist, replaced by a fixed one",

  "^Unknown feature %b`' in font %b`'%.\n"
                            .. ' %-> ' .. filepat,

  -- From IEEEtran.cls
  "^%*%* WARNING: %S+ mode specifiers after the first in %b`' "
                                    .. "ignored %(line %S+%)%.",

  "^%*%* WARNING: IEEEeqnarraybox position specifiers after "
                .. "the first in %b`' ignored %(line %S+%)%.",

  "^%*%* WARNING: IEEEeqnarray predefined inter%-column glue type "
                .. "specifiers after the first in %b`' ignored %(line %S+%)%.",

  "^%*%* WARNING: \\and is valid only when in conference or peerreviewca\n"
                .. "modes %(line %S+%)%.",

  '^%*%* WARNING: Ignoring useless \\section in Appendix %(line %S+%)%.',

  '^%*%* WARNING: IEEEPARstart drop letter has zero height%! %(line %S+%)\n'
                           .. ' Forcing the drop letter font size to 10pt%.',

  '^%*%* WARNING: \\IEEEPARstart is locked out for technotes %(line %S+%)%.',
  '^%*%* WARNING: %S+ is locked out when in conference mode %(line %S+%)%.',
  '^%*%* ATTENTION: %S+ is deprecated %(line %S+%)%. Use %S+ instead%.',

  '^%a+TeX warning: %a+tex %(file ' .. filepat .. '%): PDF inclusion: '
               .. 'multiple pdfs with page group included in a single page',
}


-- These messages may start anywhere in a line
-- Always start these patterns with "^%s*", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
anywhereWarningStringsHandler = stringsHandler:new()
anywhereWarningStringsHandler.IHandleAnywhere = true
anywhereWarningStringsHandler.severity = WARNING
anywhereWarningStringsHandler.patterns = {
}


-- We know these messages always start at the beginning of a line
-- Always start these patterns with "^", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
beginningOfLineCriticalStringsHandler = stringsHandler:new()
beginningOfLineCriticalStringsHandler.severity = CRITICAL
beginningOfLineCriticalStringsHandler.patterns = {
  "^The control sequence at the end of the top line\n"
    .. "of your error message was never \\def'ed%. If you have\n"
    .. "misspelled it %(e%.g%., `\\hobx'%), type `I' and the correct\n"
    .. "spelling %(e%.g%., `I\\hbox'%)%. Otherwise just continue,\n"
    .. "and I'll forget about whatever was undefined%.",

  "^ ======================================= \n"
    .. " WARNING WARNING WARNING \n"
    .. " " .. string.rep('%-', 39) .. " \n"
    .. " The ligature suppression macros of the \n"
    .. " selnolig package %*require%* LuaLaTeX%. \n"
    .. " Because you're NOT running this package \n"
    .. " under LuaLaTeX, ligature suppression \n"
    .. " %*can not%* be performed%. \n"
    .. "=========================================",

  -- From IEEEtran.cls
  "^%*%* No Times compatible math font package found%. "
                            .. "newtxmath is required%.",
}


-- These messages may start anywhere in a line
-- Always start these patterns with "^%s*", see lookahead().
-- Order matters! The first match wins, so the longer ones should come first.
anywhereCriticalStringsHandler = stringsHandler:new()
anywhereCriticalStringsHandler.IHandleAnywhere = true
anywhereCriticalStringsHandler.severity = CRITICAL
anywhereCriticalStringsHandler.patterns = {
}


-------------------------------------------------------------------------------
-- missingCharHandler
-- (from stringsHandler)
--
-- this differs from the prototype by severity level, the set of
-- patterns to search for, and because it uses a different kind of
-- Message object (we want to summarize missing characters specially)
-------------------------------------------------------------------------------

missingCharHandler = stringsHandler:new()
missingCharHandler.severity = CRITICAL

missingCharHandler.patterns = {
  '^Missing character: There is no .- in font .*!',
}

function missingCharHandler:newMessage()
  return missingCharMessage:new()
end


-------------------------------------------------------------------------------
-- genericLatexHandler
--
-- Messages generated by GenericInfo, PackageWarning etc., such as
--
--   Package babel Info: \l@canadian = using hyphenrules for english
--   (babel)             (\language0) on input line 102.
--
-- or
--
--   LaTeX Info: Redefining \setlength on input line 80.
--
-- These messages always start at the beginning of a line and end at the
-- end of a line. There may be more than one line; subsequent lines have
-- a specific prefix derived from the package/class name (which can be
-- obtained from the first line). We look for this prefix to detect such
-- subsequent lines.
-------------------------------------------------------------------------------

genericLatexHandler = HandlerPrototype:new()

genericLatexHandler.patterns = {
  "^(Package)%s+(%S+)%s+(%S+): ",
  "^(Class)%s+(%S+)%s+(%S+): ",
  "^(LaTeX)%s+(%S+)%s+(%S+): ",
  "^(Module)%s+(%S+)%s+(%S+): ", -- Only ever saw "Module microtype Info"
  "^(Package)%s+(%S+)%s+(notification) ", -- pgfplots does this
}

function genericLatexHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local last, data

  for _,pattern in ipairs(self.patterns) do
      -- Because this is a prototype that may be used with
      -- other patterns, we do not know how many captures
      -- are there in the pattern; put them all in a table
      data = {string.find(line, pattern)}
      table.remove(data, 1) -- remove "first"
      last = data[1]
      if last ~= nil then break end
  end

  if last == nil then
      return false, {}
  else
      return true, data
  end
end

function genericLatexHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]
  local severity = data[4]
  self.message.what = what
  self.message.name = name
  self.message.severity = self:parseSeverity(severity)

  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix

  self:unwrapLines()
  self.message.content = Lines.current
end

function genericLatexHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()

  -- erase any previous values; nil is not a good idea! If one of these
  -- is nil in a derived object, the object may grab the value of the
  -- parent object from some previous message.
  self.linenum = ""
  self.prefix = ""

  self.message = self:newMessage()
  self.message.content = ""
  self.message.severity = self.severity
  self:unpackData(data) -- process whatever canDoit() gave us

  self:extractLinenum()
  Lines:handledChars()

  self.doit = self.handleOtherLines
  nextHandler = self
  return true
end

genericLatexHandler.doit = genericLatexHandler.handleFirstLine

function genericLatexHandler:handleOtherLines()
  local _, last = string.find(Lines.current, '^' .. self.prefix)

  if last ~= nil then
      self:unwrapLines()
      Lines:handledChars(last)
      self:extractLinenum()

      self.message.content = self.message.content .. '\n' .. Lines.current

      Lines:handledChars()
      nextHandler = self
  else

      if self.linenum ~= "" then
          self.message.linenum = self.linenum

          tmp = {
                 " on input line " .. self.linenum .. "%.",
                 " on line " .. self.linenum .. "$"
                }
          for _, pat in ipairs(tmp) do
              local first = string.find(self.message.content, pat)
              if first then
                  self.message.terseContent = string.sub(self.message.content,
                                                         1, first -1)
                  break
              end
          end
      end

      self.doit = self.handleFirstLine
      dispatch(self.message)
      self.message = nil
  end

  return true
end

function genericLatexHandler:findPrefix(lastcol, name, what)
  -- continuation lines look like
  -- ^(pkgname) [padding spaces] blah blah
  -- let's find out how many spaces and
  -- build the pattern for the prefix

  local numspaces

  if name ~= nil then
      name = "(" .. name .. ")"
      numspaces = lastcol - string.len(name)
      self.prefix = protect_metachars(name)
  else
      self.prefix = ""
      numspaces = lastcol
  end

  self.prefix = self.prefix .. string.rep(" ", numspaces)
end

function genericLatexHandler:extractLinenum()
  if self.linenum ~= "" then return end

  _, _, self.linenum = string.find(Lines.current, "on input line (%d+)%.")
  if self.linenum == nil then self.linenum = "" end
  if self.linenum ~= "" then return end

  -- LaTeX3-style messages (with \msg_something)
  _, _, self.linenum = string.find(Lines.current, "on line (%d+)$")
  if self.linenum == nil then self.linenum = "" end
end

function genericLatexHandler:parseSeverity(severity)
  if severity == nil or severity == "" then return self.severity end

  severity = string.lower(severity)

   -- tocbibind uses "Note"
   -- floatflt uses "Message"
  if severity == 'info'
          or severity == 'notification'
          or severity == 'note'
          or severity == 'message'
  then
      return INFO
  elseif severity == 'warning' then
      return WARNING
  else
      return UNKNOWN
  end
end

-- similar to HandlerPrototype:unwrapLines(), but with extra checks
function genericLatexHandler:unwrapLines()
  while Lines:seemsWrapped() do
      -- The current line is the right length and other handlers do
      -- not know how to handle the next line, but we still need to
      -- check another possibility: the next line might be a "normal"
      -- continuation line
      if string.find(Lines:get(1), '^' .. self.prefix) then break end

      -- Ok, this is almost certainly a wrapped line, but it does
      -- not hurt to also check this just in case
      if string.find(Lines.current, 'on input line %d+%.$') then break end

      Lines:unwrapOneLine()
  end
end


-------------------------------------------------------------------------------
-- latex23MessageHandler
-- genericLatexVariantIHandler
-- (from genericLatexHandler)
--
-- They differ from the prototype by the set of patterns to search for and by
-- the unpackData() method, which deals with the specific pattern captures
-- defined in the search patterns.
-------------------------------------------------------------------------------

latex23MessageHandler = genericLatexHandler:new()

latex23MessageHandler.patterns = {
  "^(LaTeX3?)%s+(%S+): "
}

function latex23MessageHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local severity = data[3]
  self.message.severity = self:parseSeverity(severity)
  self.message.what = what

  local name
  -- When the message looks like "LaTeX...", there is no package
  -- name, but with "LaTeX3...", that is used as the name
  if what == 'LaTeX3' then name = 'LaTeX3' end

  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix

  self:unwrapLines()
  self.message.content = Lines.current
end

genericLatexVariantIHandler = genericLatexHandler:new()

genericLatexVariantIHandler.patterns = {
  "^(Package) (%S+) (%S+) on input line (%S+): ",
}

function genericLatexVariantIHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]
  local severity = data[4]
  self.linenum = data[5]
  self.message.what = what
  self.message.name = name
  self.message.severity = self:parseSeverity(severity)

  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix

  self:unwrapLines()
  self.message.content = Lines.current
end

-- Only ever saw "Library (tcolorbox):"
genericLatexVariantIIHandler = genericLatexHandler:new()

genericLatexVariantIIHandler.patterns = {
  "^(Library) (%(%S+%)): ",
}

function genericLatexVariantIIHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]
  self.message.what = what
  self.message.name = name
  self.message.severity = INFO

  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix

  self:unwrapLines()
  self.message.content = Lines.current
end


-------------------------------------------------------------------------------
-- citationHandler
-- referenceHandler
-- labelHandler
-- unusedLabelHandler
-- (from genericLatexHandler)
--
-- They differ from the prototype by the set of patterns to search for, by
-- the unpackData() method, and by the message type generated, because we
-- want to generate dedicated summaries for them
-------------------------------------------------------------------------------
citationHandler = genericLatexHandler:new()

citationHandler.patterns = {
  "^(LaTeX)%s+(Warning): (Citation) ('.-') on page (.-) undefined",
  "^(LaTeX)%s+(Warning): (Citation) (%b`') on page (.-) undefined",
  "^(LaTeX)%s+(Warning): (Citation) ('.-') undefined",
  "^(LaTeX)%s+(Warning): (Citation) (%b`') undefined",
}

function citationHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local severity = data[3]
  local name = data[4]
  local key = string.sub(data[5], 2, -2) -- remove quotes
  local page = data[6] -- may be null

  self.message.what = what
  self.message.name = name
  self.message.severity = self:parseSeverity(severity)
  self.message.key = key
  self.message.page = page
  self:findPrefix(last, nil, what)
  self.message.prefix = self.prefix

  self:unwrapLines()
  self.message.content = Lines.current
end

function citationHandler:newMessage()
  return citationMessage:new()
end

referenceHandler = citationHandler:new()

referenceHandler.patterns = {
  "^(LaTeX)%s+(Warning): (Reference) (%b`') on page (.-) undefined",
}

function referenceHandler:newMessage()
  return referenceMessage:new()
end

labelHandler = citationHandler:new()

labelHandler.patterns = {
  "^(LaTeX)%s+(Warning): (Label) (%b`') multiply defined",
}

function labelHandler:newMessage()
  return labelMessage:new()
end

-- These messages are generated by the package refcheck
unusedLabelHandler = genericLatexHandler:new()

unusedLabelHandler.patterns = {
  "^(Package)%s+(refcheck)%s+(Warning): Unused label (%b`')",
}

function unusedLabelHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]
  local severity = data[4]
  local key = string.sub(data[5], 2, -2) -- remove quotes

  self.message.what = what
  self.message.name = name
  self.message.severity = self:parseSeverity(severity)
  self.message.key = key
  self:findPrefix(last, name, what)
  self.message.prefix = self.prefix

  self:unwrapLines()
  self.message.content = Lines.current
end

function unusedLabelHandler:newMessage()
  return unusedLabelMessage:new()
end

-------------------------------------------------------------------------------
-- providesHandler
-- (from genericLatexHandler)
--
-- Handles the lines generated by the \Provides* LaTeX commands, such as:
--
--   Package: etoolbox 2020/10/05 v2.5k e-TeX tools for LaTeX (JAW)
--
-- or
--
--   File: size11.clo 2020/04/10 v1.4m Standard LaTeX file (size option)
--
-- There is code to specifically handle a hardcoded line break
-- in a few packages (atbegshi-ltx, atveryend-ltx etc.)
-------------------------------------------------------------------------------

providesHandler = genericLatexHandler:new()
providesHandler.severity = INFO

providesHandler.patterns = {
  "^(Document Class):%s+(%S+)%s+",
  "^(Package):%s+(%S+)%s+",
  "^(File):%s+(%S+)%s+",
  "^(Language):%s+(%S+)%s+", -- this and the next come from babel
  "^(Dictionary):%s+(%S+)%s+"
}

function providesHandler:unpackData(data)
  local last = data[1]
  local what = data[2]
  local name = data[3]

  self.message.what = what
  self.message.name = name

  -- There are no continuation lines of this kind for
  -- these messages, but the generic code still wants
  -- to check for the prefix.
  self.prefix = '[^%s%S]+' -- nothing matches

  self:unwrapLines()
  self.message.content = Lines.current

  if not Lines:empty() then
      if string.find(Lines:get(1), 'with kernel methods') then
          self.message.content = self.message.content .. ' ' .. Lines:get(1)
          Lines:gotoNextLine()
      end
  end
  self.message.content = string.gsub(self.message.content,
                        '%s*with kernel methods', ' with kernel methods')
end


-------------------------------------------------------------------------------
-- geometryDetailsHandler
-- (from genericLatexHandler)
--
-- Handles the output from the geometry package with the "verbose" option.
-- Iinheriting from genericLatexHandler is a little hackish, since
-- the message does not really fit the "standard" message format (we define
-- the prefix statically instead of computing it from the message itself),
-- but works well and is simple.
-------------------------------------------------------------------------------

geometryDetailsHandler = genericLatexHandler:new()
geometryDetailsHandler.severity = INFO

geometryDetailsHandler.patterns = {
    '^%*geometry%* verbose mode',
}

function geometryDetailsHandler:unpackData(data)
  self.message.name = 'geometry'
  self.prefix = '* '
  self.message.prefix = self.prefix

  self:unwrapLines()
  self.message.content = Lines.current
end

-------------------------------------------------------------------------------
-- openParensHandler
-- closeParensHandler
-- openSquareBracketHandler
-- closeSquareBracketHandler
--
-- These determine whether the chars "(", ")", "[", and "]" are ordinary
-- characters or if they indicate open/close file or begin/end shipout.
-- Detecting files allows us to filter the output according to which file
-- is being processed; detecting shipouts allows us to indicate the page
-- that a message originated from.
--
-- Although openParensHandler is very similar to openSquareBracketsHandler
-- and closeParensHandler is very similar to closeSquareBracketsHandler,
-- we decided not to use inheritance to reuse the code (except for the
-- lookahead() method). We would avoid a little repetition, but the code
-- size would probably be the same and it would be more complex.
--
-- The fact that the code is similar may seem a little odd at first: While
-- the openFiles stack may contain many entries (because an open file may
-- request to open another file etc.), the same is obviously not true for
-- the shipouts stack. Still, we use a stack too because we need to handle
-- "[" and "]" characters that do not correspond to real shipouts, just
-- like we do with "(" and ")".
-------------------------------------------------------------------------------

openCloseHandlerPrototype = HandlerPrototype:new()

function openCloseHandlerPrototype:init()
    self.openPat = '%' .. self.openChar
    self.closePat = '%' .. self.closeChar
    self.openOrClosePat = '[' .. self.openPat .. self.closePat .. ']'
    self.pattern = self.strictPattern
end

-- Just like :canDoit(), but does not anchor patterns to the
-- beginning of the line (used by handleUnrecognizedMessage).
-- Notice the similarity to stringsHandler:lookahead().
function openCloseHandlerPrototype:lookahead()
    self.pattern = self.loosePattern
    match, data = self:canDoit()
    self.pattern = self.strictPattern
    -- Only return true if whatever matches
    -- is not at the beginning of the line
    if match and data.first == 1 then return false, {} end

    return match, data
end

openParensHandler = openCloseHandlerPrototype:new()

openParensHandler.name = "openParensHandler"
openParensHandler.openChar = '('
openParensHandler.closeChar = ')'
openParensHandler.loosePattern = "(%s*)%" .. openParensHandler.openChar
openParensHandler.strictPattern = "^" .. openParensHandler.loosePattern

function openParensHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local first, last = string.find(line, self.pattern)
  if first == nil then return false, {} end

  line = string.sub(line, last +1)

  local filename = guessFilename(position)

  -- HACK ALERT: if position > 0, we are "looking into the future"
  -- trying to figure out whether to unwrap some line. If there is
  -- an open parens with no filename here, we should normally return
  -- "true". However, if a close parens exists later on the same line,
  -- unwrapping is probably a good idea, so we will lie in this case
  -- and say we cannot handle the line. This is not a problem: when
  -- the time comes to actually process this line, either both the
  -- open and close parens will be embedded in a larger, known message
  -- (because of the unwrapping) or we will handle them using the
  -- "DUMMY" entry in the stack as usual.
  if filename == nil and position > 0 then
      if string.find(line, self.closePat) then return false, {} end
  end

  return true, {first = first, filename = filename} -- might be nil
end

function openParensHandler:doit()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  if spaces == nil then spaces = "" end
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the open parens character
  Lines:handledChars(last)

  if data.filename ~= nil then
      flushUnrecognizedMessages()
      local last = unwrapUntilStringMatches(data.filename)
      if last == nil then
          io.stderr:write("    texlogsieve: parsing error near input line "
                               .. Lines.linenum
                               .. " (" .. self.name .. ":doit)\n")

          PARSE_ERROR = true
      else
          Lines:handledChars(last)
      end
      if openFiles:peek() == "DUMMY" then openFiles:pop() end
      openFiles:push(data.filename)
      mute = currentFileIsSilenced()
      local msg = openFileMessage:new()
      msg.severity = DEBUG
      msg.filename = data.filename
      msg.content = "Open file " .. data.filename
      dispatch(msg)
  else
      openFiles:push("DUMMY")
      unrecognizedBuffer = unrecognizedBuffer .. self.openChar
  end

  return true
end

closeParensHandler = openCloseHandlerPrototype:new()

closeParensHandler.name = "closeParensHandler"
closeParensHandler.openChar = '('
closeParensHandler.closeChar = ')'
closeParensHandler.loosePattern = "(%s*)%" .. closeParensHandler.closeChar
closeParensHandler.strictPattern = "^" .. closeParensHandler.loosePattern

-- In lookahead, when we say "we can do it" we actually mean "well,
-- we might be able to do it". This is not a problem: it simply
-- causes handleUnrecognizedMessage to not immediately include the
-- close parens in the unrecognized buffer, leaving it in place to
-- be processed at the next iteration. In this next iteration, it
-- will be at the start of the line, allowing us to examine things
-- more carefully.
function closeParensHandler:lookahead()
  local line = Lines:get(0)
  if line == nil then return false, {} end

  local first = string.find(line, self.loosePattern)
  -- Only return true if whatever matches
  -- is not at the beginning of the line
  if first == nil or first == 1 then return false, {} end

  return true, {first = first}
end

-- When position == 0, we just want to know whether there is a close
-- parens character here; it is up to doit() to match it or not with
-- a file or a DUMMY entry in the stack.
--
-- When position > 0, we are looking into the future to determine
-- whether we should unwrap a long line. In this case, we may want
-- to say "no, we cannot handle this" even if there is a close
-- parens character here:
--
--  * The close parens may pair up with something in the stack. If
--    that is the case, we do not want to unwrap a line, as it is
--    an independent message, so we should return true.
--
--  * The close parens may not pair up with anything. If that is the
--    case, we cannot really know whether we should unwrap or not:
--    it may be the continuation of a previous unknown message or
--    the start of a new unknown message. However, it is unlikely
--    for an unknown message to start with a close parens character,
--    so it's probably better to unwrap and, therefore, we should
--    return false (i.e., lie). Note, however, that this case is
--    very unlikely to happen in practice with parens (it may happen
--    with square brackets): there is always something in the stack,
--    even if it is only the main tex file we are processing.
--
--  * The close parens may pair up with some open parens character
--    in a line between 0 and the current value of "position". If
--    that is the case, we want to unwrap, as they are probably part
--    of the same message, so we should return false (i.e., lie).
--
--  * A special case happens if the line we want to decide on whether
--    to unwrap or not is an "open file" message. If that is the case,
--    proceeding to unwrap as per the previous item is "wrong". Still,
--    this causes no harm: during processing of the "open file"
--    message, the close parens will be detected and postponed for
--    future processing.

function closeParensHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local first = string.find(line, self.pattern)
  if first == nil then return false, {} end

  -- Ok, there is a close parens character here. We are either at the
  -- current line or in the "future" (position > 0).

  if position == 0 then return true, {first = first} end

  -- If we are in the "future", we check for open/close parens characters
  -- in the lines between "present" and "future" trying to pair them up,
  -- either among themselves or with whatever is at the stack.
  local linenum = 0
  local pending = openFiles:size()
  local unpaired = 0 -- open parens with no corresponding close parens

  -- This is "<", not "<=". Why? Because the line pointed at by "position"
  -- starts at the close parens character, so there is nothing before it
  -- to check: If position > 0, this is only called if the close parens
  -- character really is at the beginning of the line
  while linenum < position do
    local size = 0
    local i = 1
    local line = Lines:get(linenum)
    if line ~= nil then size = string.len(line) end

    while i <= size do
        local j = string.find(line, self.openOrClosePat, i)
        if j ~= nil then
            local open = string.find(line, self.openPat, i)
            if open then
                unpaired = unpaired +1
            elseif unpaired > 0 then
                unpaired = unpaired -1
            elseif pending > 0 then
                pending = pending -1
            end
            i = j +1
        else
            i = size +1
        end
    end

    linenum = linenum +1
  end

  if pending > 0 and unpaired == 0 then
      return true, {first = first} -- we pair up with something in the stack
  else
      return false, {} -- let's lie!
  end
end

function closeParensHandler:doit()
  local myTurn = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  if spaces == nil then spaces = "" end
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the close parens character
  Lines:handledChars(last)

  local filename = openFiles:pop()
  if filename == nil or filename == "DUMMY" then
      unrecognizedBuffer = unrecognizedBuffer .. self.closeChar
  else
      flushUnrecognizedMessages()
      local msg = closeFileMessage:new()
      msg.severity = DEBUG
      msg.content = "Close file " .. filename
      dispatch(msg)
      mute = currentFileIsSilenced()
  end

  return true
end

openSquareBracketHandler = openCloseHandlerPrototype:new()

openSquareBracketHandler.name = "openSquareBracketHandler"
openSquareBracketHandler.openChar = '['
openSquareBracketHandler.closeChar = ']'
openSquareBracketHandler.loosePattern = "(%s*)%"
                                      .. openSquareBracketHandler.openChar
openSquareBracketHandler.strictPattern = "^"
                                       .. openSquareBracketHandler.loosePattern

function openSquareBracketHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local first, last = string.find(line, self.pattern)
  if first == nil then return false, {} end

  line = string.sub(line, last +1)

  local latexPage = guessShipoutPage(position)

  -- See the comment "HACK ALERT" in openParensHandler:canDoit()
  if latexPage == nil and position > 0 then
      if string.find(line, self.closePat) then return false, {} end
  end

  return true, {first = first, latexPage = latexPage} -- may be nil
end

function openSquareBracketHandler:doit()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the open square bracket character
  Lines:handledChars(last)

  if data.latexPage ~= nil then
      flushUnrecognizedMessages()
      local last = unwrapUntilStringMatches(data.latexPage)
      if last == nil then
          io.stderr:write("    texlogsieve: parsing error near input line "
                               .. Lines.linenum
                               .. " (" .. self.name .. ":doit)\n")

          PARSE_ERROR = true
      else
          Lines:handledChars(last)
      end
      if shipouts:peek() == "DUMMY" then shipouts:pop() end
      shipouts:push(data.latexPage)
      numShipouts = numShipouts +1
      table.insert(latexPages, numShipouts, data.latexPage)
      local msg = beginShipoutMessage:new()
      msg.physicalPage = numShipouts
      dispatch(msg)
  else
      shipouts:push("DUMMY")
      unrecognizedBuffer = unrecognizedBuffer .. self.openChar
  end

  return true
end

closeSquareBracketHandler = openCloseHandlerPrototype:new()

closeSquareBracketHandler.name = "closeSquareBracketHandler"
closeSquareBracketHandler.openChar = '['
closeSquareBracketHandler.closeChar = ']'
closeSquareBracketHandler.loosePattern = "(%s*)%"
                                    .. closeSquareBracketHandler.closeChar
closeSquareBracketHandler.strictPattern = "^"
                                    .. closeSquareBracketHandler.loosePattern

-- Read the comment right before "closeParensHandler:lookahead()"
function closeSquareBracketHandler:lookahead()
  local line = Lines:get(0)
  if line == nil then return false, {} end

  local first = string.find(line, self.loosePattern)
  -- Only return true if whatever matches
  -- is not at the beginning of the line
  if first == nil or first == 1 then return false, {} end

  return true, {first = first}
end

-- Read the comments right before and inside "closeParensHandler:canDoit()"
function closeSquareBracketHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local first = string.find(line, self.pattern)
  if first == nil then return false, {} end

  if position == 0 then return true, {first = first} end

  local linenum = 0
  local pending = shipouts:size()
  local unpaired = 0

  while linenum < position do
    local size = 0
    local i = 1
    local line = Lines:get(linenum)
    if line ~= nil then size = string.len(line) end

    while i <= size do
        local j = string.find(line, self.openOrClosePat, i)
        if j ~= nil then
            local open = string.find(line, self.openPat, i)
            if open then
                unpaired = unpaired +1
            elseif unpaired > 0 then
                unpaired = unpaired -1
            elseif pending > 0 then
                pending = pending -1
            end
            i = j +1
        else
            i = size +1
        end
    end

    linenum = linenum +1
  end

  if pending > 0 and unpaired == 0 then
      return true, {first = first}
  else
      return false, {}
  end
end

function closeSquareBracketHandler:doit()
  local myTurn = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, self.pattern)
  unrecognizedBuffer = unrecognizedBuffer .. spaces

  -- skip the spaces and the close square bracket character
  Lines:handledChars(last)

  local latexPage = shipouts:pop()
  if latexPage == nil or latexPage == "DUMMY" then
      unrecognizedBuffer = unrecognizedBuffer .. self.closeChar
  else
      flushUnrecognizedMessages()
      local msg = endShipoutMessage:new()
      msg.physicalPage = numShipouts
      dispatch(msg)
  end

  return true
end

-- During a shipout and in the epilogue (maybe at some other times too?),
-- TeX sometimes puts a filename inside a pair of "{}" or "<>" characters.
extraFilesHandler = HandlerPrototype:new()
extraFilesHandler.strictPatterns = {
    "^%s*%b<>",
    "^%s*%b{}",
}
extraFilesHandler.loosePatterns = {
    "%s*%b<>",
    "%s*%b{}",
}
extraFilesHandler.patterns = extraFilesHandler.strictPatterns

-- Read the comment right before "closeParensHandler:lookahead()".
-- That does not work here because we may mess up messages that present
-- content from the document, which may include "{}" or "<>" characters.
-- An example is the "pdfTeX warning (ext4):..." message. The solution
-- is to have lookahead() answer "yes" ONLY if there really is a filename
-- within the "{}" or "<>" pair.

function extraFilesHandler:lookahead()
  extraFilesHandler.patterns = extraFilesHandler.loosePatterns
  local ok, data = self:canDoit()
  extraFilesHandler.patterns = extraFilesHandler.strictPatterns
  -- Only return true if whatever matches
  -- is not at the beginning of the line
  if ok and data.first == 1 then return false, {} end
  return ok, data
end

function extraFilesHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local first, last, match
  for _, pat in ipairs(self.patterns) do
      first, last = string.find(line, pat)
      if first then
          match = string.sub(line, first, last)
          break
      end
  end

  if first == nil then
      local ok, data = self:tryUnwrapping(position)
      if ok then first = data.first match = data.match end
  end

  if first == nil then return false, {} end

  _, last = string.find(match, '^%s*')
  if last then match = string.sub(match, last +1) end

  if checkIfFileExists(string.sub(match, 2, -2)) then
      return true, {first = first, name = match}
  end
  return false, {}
end

function extraFilesHandler:doit()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  local _, last, spaces = string.find(Lines.current, "(^%s+)")
  if last then
      unrecognizedBuffer = unrecognizedBuffer .. spaces
      Lines:handledChars(last)
      flushUnrecognizedMessages()
  end

  last, _ = unwrapUntilStringMatches(data.name) -- this should never fail

  local msg = extraFilesMessage:new()
  msg.content = data.name
  Lines:handledChars(last)
  dispatch(msg)

  return true
end


-------------------------------------------------------------------------------
-- utf8FontMapHandler
--
-- This handles the encoding-related multi-line messages generated by
-- inputenc/fontenc with pdftex and utf8, similar to:
--
--   Now handling font encoding LS1 ...
--   ... no UTF-8 mapping file for font encoding LS1
--
-- or
--
--   Now handling font encoding LY1 ...
--   ... processing UTF-8 mapping file for font encoding LY1
--      defining Unicode char U+00A0 (decimal 160)
--
-- We could handle these by inheriting from genericLatexHandler, which
-- might be a good idea as this handler is quite complex. Still, the
-- advantage of this handler is that it groups the first lines (that
-- indicate the encoding) and the continuation lines as a unit, even
-- though there are other messages in between that are handled by other
-- handlers. We use two strategies to deal with continuation lines to
-- accomplish this:
--
--  1. After the first line, we change the self.doit method and define
--     ourselves as nextHandler, because we know the next line "belongs"
--     to us (nothing new here, genericLatexHandler does this too)
--
--  2. After the second line, we do not know whether the next line "belongs"
--     to us (actually, it most likely does not). So, we just change the
--     self.doit method and wait to be called by chooseHandler() again a
--     few lines down the road. When one of the patterns we are looking for
--     matches, we are back in business. This is why handleOtherLines must
--     check for the base "Now handling font encoding" pattern too: there
--     might not be continuation lines and we need to handle a new message
--     instead at some other point in the input file.
-------------------------------------------------------------------------------

utf8FontMapHandler = HandlerPrototype:new()

-- we repeat here the tests we make on the other methods of this
-- object, which is somewhat dirty.
function utf8FontMapHandler:canDoit(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  if line == nil then return false, {} end

  local first, encoding
  if self.doit == self.handleFirstLine then
      first, _, encoding = string.find(line,
              "^Now handling font encoding (%S+) %.%.%.")
  elseif self.doit == self.handleSecondLine then
      first = string.find(line,
              "^%.%.%. no UTF%-8 mapping file for font encoding")

      if first == nil then
          first = string.find(
                  "^%.%.%. processing UTF%-8 mapping file for font encoding")
      end
  else
      first = string.find(line, "^%s*defining Unicode char")
  end

  if first == nil then
      return false, {}
  else
      return true, {encoding = encoding}
  end
end

function utf8FontMapHandler:handleFirstLine()
  local myTurn, data = self:canDoit()
  if not myTurn then return false end

  flushUnrecognizedMessages()
  self.message = Message:new()
  self.message.severity = DEBUG
  self.message.content = Lines.current
  self.message.encoding = data.encoding
  Lines:handledChars()
  self.encoding = data.encoding
  self.doit = self.handleSecondLine
  nextHandler = self
  return true
end

utf8FontMapHandler.doit = utf8FontMapHandler.handleFirstLine

function utf8FontMapHandler:handleSecondLine()
  if string.find(Lines.current,
                  "^%.%.%. no UTF%-8 mapping file for font encoding") then

      self.message.content = self.message.content .. '\n' .. Lines.current
      Lines:handledChars()
      dispatch(self.message)
      self.message = nil
      self.doit = self.handleFirstLine
      return true
  end

  if string.find(Lines.current,
            "^%.%.%. processing UTF%-8 mapping file for font encoding") then

      self.message.content = self.message.content .. '\n' .. Lines.current
      Lines:handledChars()
      self.doit = self.handleOtherLines
      self.numTries = 0
      self.foundOtherLines = false
      return true
  end

  -- The second line was neither "no UTF-8 mapping..." nor
  -- "processing UTF-8 mapping" - this should never happen
  dispatch(self.message)
  self.message = nil
  io.stderr:write("    texlogsieve: parsing error near input line "
                       .. Lines.linenum
                       .. " (utf8FontMapHandler:handleSecondLine)\n")

  PARSE_ERROR = true
  Lines:handledChars()
  self.doit = self.handleFirstLine
  return true
end

-- Between handleSecondLine and handleOtherLines there usually are two
-- messages: "open file ENCenc.dfu" and "ProvidesFile ENCenc.dfu".
-- Therefore, we try to find the first "...defining Unicode char"
-- message for the following 4 lines before giving up.
function utf8FontMapHandler:handleOtherLines()
  if string.find(Lines.current, "^%s*defining Unicode char") then
      flushUnrecognizedMessages()
      self.foundOtherLines = true
      self.message.content = self.message.content .. '\n' .. Lines.current
      Lines:handledChars()
      nextHandler = self
      return true
  end

  -- this line does not match; why? First possibility: there are no
  -- "...defining Unicode char" lines to be found; instead, there is
  -- another encoding being defined. This obviously should not happen

  if string.find(Lines.current,
                   "^Now handling font encoding (%S+) %.%.%.") then
      -- give up and start processing the new message (should not happen)
      dispatch(self.message)
      self.message = nil
      flushUnrecognizedMessages()
      io.stderr:write("    texlogsieve: parsing error near input line "
                           .. Lines.linenum
                           .. " (utf8FontMapHandler:handleOtherLines)\n")

      PARSE_ERROR = true
      self.numTries = 0
      self.foundOtherLines = false
      self.doit = self.handleFirstLine
      return self:handleFirstLine()
  end

  -- second possibility: we have not yet reached the first "...defining
  -- Unicode char" message, but we want to try again in the next line
  if not self.foundOtherLines and self.numTries < 4 then
      self.numTries = self.numTries +1
      return false, {} -- we will try again later; for now, call chooseHandler()
  end

  -- third possibility: we never reached any "...defining Unicode char" line
  -- but we already tried for too many lines (more than 4), so give up and
  -- do nothing (we might output a "parsing error" message, but that is
  -- probably unnecessary)

  -- fourth possibility: we already found all lines, so
  -- we are done and there is no need to do anything else

  -- wrap up for the third and fourth possibilities
  dispatch(self.message)
  self.message = nil
  self.doit = self.handleFirstLine
  self.numTries = 0
  self.foundOtherLines = false
  return true
end


--[[ ##################################################################### ]]--
--[[ ############################# MESSAGES ############################## ]]--
--[[ ##################################################################### ]]--

Message = {}

function Message:new()
  local o = {}
  setmetatable(o, self)
  self.__index = self
  o.mute = mute
  o.content = ""
  o.prefix = ""
  if openFiles ~= nil and not openFiles:empty() then
      o.filename = openFiles:peek()
  end
  return o
end

Message.severity = UNKNOWN

function Message:toString(bypassMostFilters)

    -- We do not want to exclude these in the summaries
    if not bypassMostFilters then

        -- If we've already been here, just output the previous result
        if self.formatted ~= nil then return self.formatted end

        if self.mute then self.formatted = "" return "" end

        if self.severity < MINLEVEL then self.formatted = "" return "" end

        if self:ignoreAsRedundant() then self.formatted = "" return "" end

        if self.name ~= nil then
            for _, val in ipairs(SILENCE_PKGS) do
                if self.name == val then self.formatted = "" return "" end
            end
        end

    end

    local formatted = self:realToString()
    if trim(formatted) == "" then self.formatted = "" return "" end

    for _, val in ipairs(SILENCE_STRINGS) do
        if string.find(formatted, val) or string.find(self.content, val)
        then
            self.formatted = ""
            return ""
        end
    end

    if not bypassMostFilters then self.formatted = formatted end

    return formatted
end

function Message:realToString()
  if self.prefix == "" then return self.content end

  local lines = linesToTable(self.content)
  local msg = table.remove(lines, 1)
  for _, line in ipairs(lines) do
      msg = msg .. '\n' .. unprotect_metachars(self.prefix) .. line
  end
  return msg
end

Message.redundantMessages = {
  {
    WARNING,
    'LaTeX',
    'There were undefined references%.'
  },
  {
    WARNING,
    'LaTeX',
    'There were multiply%-defined labels%.'
  },
}

Message.rerunMessages = {
  {
    WARNING,
    'LaTeX',
    'Label%(s%) may have changed%. Rerun to get cross%-references right%.'
  },
  {
    WARNING,
    'LaTeX',
    'Temporary extra page added at the end%. Rerun to get it removed%.'
  },
  {
    WARNING,
    'longtable',
    'Table %S+s have changed%. Rerun LaTeX%.'
  },
  {
    WARNING,
    'longtable',
    'Column %S+s have changed\nin table'
  },
  {
    WARNING,
    'rerunfilecheck',
    "File %b`' has changed%."
  },
  {
    WARNING,
    'biblatex',
    'Please rerun LaTeX%.'
  },
  {
    WARNING,
    'biblatex',
    'Please %(re%)run Biber on the file:'
  },
  {
    WARNING,
    'atenddvi',
    'Rerun LaTeX, last page not yet found%.'
  },
  {
    WARNING,
    'hyperref',
    'Rerun to get /PageLabels entry%.'
  },
  {
    WARNING,
    'hyperref',
    "Rerun to get outlines right\nor use package `bookmark'",
  },
  {
    WARNING,
    'backref',
    'Rerun to get bibliographical references right',
  },
  {
    WARNING,
    'pbalance',
    'Last two%-column page cols not balanced%. Rerun LaTeX%.',
  },
  {
    WARNING,
    'simurgh',
    'unjustified poem%. rerun lualatex to get poem right%.',
  },
  {
    WARNING,
    'bidipoem',
    'Unjustified poem%. Rerun XeLaTeX to get poem right',
  },
  {
    WARNING,
    'biblatex',
    'Page references have changed%.\nRerun to get references right%.',
  },
  {
    WARNING,
    'bidi-perpage',
    "Counter %b`' may not have been reset per page%. Rerun to reset counter %b`' per page%.",
  },
  {
    WARNING,
    'Changebar',
    'Changebar info has changed%.\nRerun to get the bars right',
  },
  {
    WARNING,
    'thumbs',
    '\\th@mbmaxwidtha undefined%.\nRerun to get the thumb marks width right%.',
  },
  {
    WARNING,
    'thumbs',
    'Rerun to get the thumb marks width right%.',
  },
  {
    WARNING,
    'thumbs',
    'Warning: Rerun to get list of thumbs right!',
  },
  {
    WARNING,
    'thumbs',
    'File .-%.tmb not found%.\nRerun to get thumbs overview page%(s%) right%.',
  },
  {
    WARNING,
    'xdoc2',
    'Page breaks may have changed%.\nRerun to get marginal material right%.',
  },
  {
    WARNING,
    'bibtopic',
    'Rerun to get indentation of bibitems right%.',
  },
  {
    WARNING,
    'polytable',
    'Column widths have changed%. Rerun LaTeX%.',
  },
  {
    WARNING,
    'mpostinl',
    'figure%(s%) may have changed%. Rerun to update figures',
  },
  {
    WARNING,
    'uftag',
    'label .- unknown %- rerun',
  },
  {
    WARNING,
    'uftag',
    'structure with label .- is unknown rerun',
  },
  {
    WARNING,
    'LaTeX', -- actually, mciteplus
    'Rerun to ensure correct mciteplus label max width/count',
  },
  {
    WARNING,
    'LaTeX', -- actually, mciteplus
    'Mciteplus max count has changed',
  },
  {
    WARNING,
    'LaTeX', -- actually, mciteplus
    'Mciteplus max width has changed',
  },
  {
    WARNING,
    'LaTeX', -- actually, upmethodology
    'Project Task%(s%) may have changed%.\nRerun to get cross%-references right',
  },
  {
    WARNING,
    'media9',
    'Rerun to get internal references right!',
  },
  {
    WARNING,
    'pdfbase',
    'Rerun to get internal references right!',
  },
  {
    WARNING,
    'revtex4',
    'Endnote numbers changed: rerun LaTeX',
  },
  {
    WARNING,
    'natbib',
    'Citation%(s%) may have changed%.\nRerun to get citations correct',
  },
  {
    WARNING,
    'gridset',
    'Grid position labels may have changed%.\nRerun to get grid positions right%.',
  },
  {
    WARNING,
    'datagidx',
    'Rerun required to sort the index/glossary databases',
  },
  {
    WARNING,
    'datagidx',
    'Rerun required to ensure the index/glossary location lists are up%-to%-date',
  },
  {
    WARNING,
    'exframe',
    'points changed for .- .-; rerun to fix',
  },
  {
    WARNING,
    'LaTeX', -- actually,
    'Rerun to get page numbers of acronym .- in acronym list right',
  },
  {
    WARNING,
    'LaTeX', -- actually, acro
    'Acronyms may have changed. Please rerun LaTeX',
  },
  {
    WARNING,
    'LaTeX', -- actually, acro
    'Rerun to get barriers of acronym .- right',
  },
  {
    WARNING,
    'LaTeX', -- actually, acro
    'Rerun to get acronym list right',
  },
  {
    WARNING,
    'changes',
    'LaTeX rerun needed for list of changes',
  },
  {
    WARNING,
    'changes',
    'LaTeX rerun needed for summary of changes',
  },
  {
    WARNING,
    'totcount',
    'Rerun to get correct total counts',
  },
  {
    WARNING,
    'longfigure',
    '.- .-s have changed%. Rerun %S-%.',
  },
  {
    WARNING,
    'knowledge',
    "The label '[^']+' could not be found while searching for '[^']+'%. Possibly rerun latex%.",
  },
  {
    WARNING,
    'scope',
    "Unknown label %b`'%. Possibly rerun latex%.",
  },
  {
    WARNING,
    'scope',
    "The area %b`' can't be found in context%. Possibly rerun latex%.",
  },
  {
    WARNING,
    'LaTeX', -- actually, constants
    'Label%(s%) for constants may have changed%. Rerun to get cross%-references right',
  },
  {
    WARNING,
    'keyvaltable',
    "No row data available for name '[^']+'%. A LaTeX rerun might be needed\nfor the row data to be available",
  },
  {
    WARNING,
    'totalcount',
    'Total counter%(s%) have changed%.\nRerun to get them right',
  },
  {
    WARNING,
    'caption',
    "%b`' support has been changed%.\nRerun to get captions right",
  },
  {
    WARNING,
    'LaTeX', -- actually, enotez
    'Endnotes may have changed%. Rerun to get them right%.',
  },
  {
    WARNING,
    'autobreak',
    'Layout may have changed%.\nRerun to get layout correct',
  },
  {
    WARNING,
    'LaTeX', -- actually, eqparbox
    "Rerun to correct the width of eqparbox %b`'",
  },
  {
    WARNING,
    'LaTeX', -- actually, eqparbox
    "Rerun to correct eqparbox widths",
  },
  {
    WARNING,
    'lineno',
    'Linenumber reference failed, rerun to get it right',
  },
  {
    WARNING,
    'xsavebox',
    'Rerun to get internal references right!',
  },
  {
    WARNING,
    'notespage',
    'New notes pages were added%. Please rerun LaTeX to get header marks right%.',
  },
  {
    WARNING,
    'linegoal',
    '\\linegoal value on page .- has changed\n since last run%. Please rerun to get\nthe correct value',
  },
  {
    WARNING,
    'pgfgantt',
    'Gantt chart expansion may have changed%. Rerun to get expansion right',
  },
  {
    WARNING,
    'lastpage',
    'Rerun to get the references right',
  },
  {
    WARNING,
    'animate',
    '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n@ Rerun to get internal references right! @\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@',
  },
  {
    WARNING,
    'notes2bib',
    'To get notes in the correct order, please run:\n 1%) LaTeX\n 2%) BibTeX\n 3%) LaTeX',
  },
  {
    WARNING,
    'xsim',
    'Exercise properties may have changed%. Rerun to get them synchronized%.',
  },
  {
    WARNING,
    'backcite',
    'Backcite%(s%) may have changed%.\nRerun to get back references right',
  },
  {
    WARNING,
    'hyper',
    'Hyper anchor%(s%) may have changed%.\nRerun to get hyper references right',
  },
  {
    WARNING,
    'footbib',
    'Bibliography not yet stable%. Rerun LaTeX',
  },
  {
    WARNING,
    'glossaries',
    'Navigation panel has changed%. Rerun LaTeX',
  },
  {
    WARNING,
    'fancylabel',
    'Fancylabels may have changed%. Please run latex again%.',
  },
  {
    WARNING,
    'papermas',
    'Number of pages may have changed%.\nRerun to get it right',
  },
  {
    WARNING,
    'standalone',
    "Sub-preamble of file '[^']+' has changed%. Content will be ignored%. Please rerun LaTeX!",
  },
  {
    WARNING,
    'movie15',
    '@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n@@ Rerun to get object references right! @@\n@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@',
  },
  {
    WARNING,
    'atenddvi',
    'Rerun LaTeX, last page not yet found',
  },
  {
    WARNING,
    'atenddvi',
    'Rerun LaTeX, last page has changed',
  },
  {
    WARNING,
    'transparent',
    'Rerun to get transparencies right',
  },
  {
    WARNING,
    'accessibility',
    'Changed paragraphs, rerun to get it right',
  },
  {
    WARNING,
    'exam',
    'Point totals have changed%. Rerun to get point totals right',
  },
  {
    WARNING,
    'scrlayer-notecolumn',
    'MD5 of slnc%-file changed%.\nLast: .-\nNew: .-\nAt least one more LaTeX rerun needed to\nmake note columns right',
  },
--[[
-- why are these "INFO"? Maybe they appear even when no rerun is needed?
  {
    INFO,
    'scrlayer-notecolumn',
    'No text label for \\syncwithnotecolumn%.\nNeed rerun to sync position',
  },
  {
    INFO,
    'scrlayer-notecolumn',
    "No note label for sync with note column\n%b`'%.\nNeed rerun to sync position",
  },
  {
    INFO,
    'scrlayer-notecolumn',
    'Last note page before last text page%.\nNeed rerun to sync position',
  },
  {
    INFO,
    'scrlayer-notecolumn',
    'Last note position before last text\nposition%.\nNeed rerun to sync position',
  },
--]]
  {
    WARNING,
    'LaTeX', -- actually, vhistory
    'Rerun LaTeX to get the history of versions%.',
  },
  {
    WARNING,
    'bgteubner',
    'Indentation for theorem titles has been ..creased%.\nRerun to adjust the indentation',
  },
  {
    WARNING,
    'pageslts',
    'Label%(s%) may have changed%.\nRerun to get cross-references right',
  },
  {
    WARNING,
    'limap',
    'Table .-s have changed%. Rerun LaTeX%.',
  },
  {
    WARNING,
    'uniq',
    "%b`' is unique now%.\nRerun LaTeX to pick up the change",
  },
  {
    WARNING,
    'uniq',
    "%b`' is not unique anymore%.\nRerun LaTeX to pick up the change",
  },
  {
    WARNING,
    'uniq',
    'Uniquenesses have changed.%s?\nRerun LaTeX to pick up the change',
  },
  {
    WARNING,
    'bibunits',
    'Label%(s%) may have changed%. Rerun to get cross%-references right',
  },
  {
    WARNING,
    'ocgx2',
    'Rerun to get OCG references right!',
  },
  {
    WARNING,
    'glossaries',
    "Missing pre%-location tag for %b`'%. Rerun required",
  },
  {
    WARNING,
    'mparhack',
    'Marginpars may have changed%.\nRerun to get them right',
  },
  {
    WARNING,
    'LaTeX', -- actually, etaremune
    'Etaremune labels have changed%.\n%s+Rerun to get them right',
  },
}

function Message:checkMatch(patlist)
  for _, pat in ipairs(patlist) do
      -- lua does not have "continue", so we put the loop body
      -- in a "repeat/until true" block and use break instead.
      repeat
          local severity = pat[1]
          local pkgname = pat[2]
          local text = pat[3]

          if self.severity ~= severity then break end

          -- This code targets messages generated by genericLatexHandler.
          -- With it, messages generated by LaTeX do not carry the name
          -- of any package; in these cases, we use "LaTeX" instead.
          local name = self.name
          if name == nil then name = self.what end
          if name ~= pkgname then break end

          if string.find(self:realToString(), text)
                        or string.find(self.content, text)
          then return true end

      until true
  end

  return false
end

function Message:ignoreAsRedundant()
  if BE_REDUNDANT then return false end

  if self.redundant == nil then
      if self:checkMatch(self.redundantMessages)
              or self:checkMatch(self.rerunMessages) -- these are redundant too

      then
          self.redundant = true
      else
          self.redundant = false
      end
  end

  return self.redundant
end

function Message:toSummary()
  local formatted = self:toString()
  if trim(formatted) == "" then return end

  -- In the rare event that one of these is sent out as
  -- an unrecognizedMessage with no other text, allow for
  -- repetitions
  if string.find(trim(formatted), '^[%(%)%[%]]$') then return end

  repetitionsSummary:add(self)
end


function currentFileIsSilenced()
  if openFiles:empty() then return false end

  local filename = openFiles:peek()
  local _, last = string.find(filename, '^.*/') -- get just the basename
  if last ~= nil then filename = string.sub(filename, last +1) end
  for _, pattern in ipairs(SEMISILENCE_FILES) do
      if string.find(filename, pattern) then return true end
  end

  -- This is O(n*m) and gets executed for every message,
  -- but "n" and "m" are unlikely to grow much beyond 10.
  for _, filename in ipairs(openFiles) do
      -- get just the basename
      local basename = filename
      local _, last = string.find(basename, '^.*/')
      if last ~= nil then basename = string.sub(basename, last +1) end

      for _, pattern in ipairs(SILENCE_FILES_RECURSIVE) do
          if string.find(basename, pattern) then return true end
      end
  end

  return false
end


beginShipoutMessage = Message:new()
function beginShipoutMessage:realToString()
  if not SHOW_SHIPOUTS then return "" end

  if RAW then return '[' .. latexPages[self.physicalPage] end

  return ""
end


endShipoutMessage = Message:new()
endShipoutMessage.shipout = true
function endShipoutMessage:realToString()
  if not SHOW_SHIPOUTS then return "" end

  if RAW then return ']' end

  -- print counter as [cnt], just as LaTeX does. With that, if a program is
  -- monitoring output, it can show some sort of progress bar to the user
  local msg = 'shipout - physical page ' .. self.physicalPage
              .. ', LaTeX page counter ['
              .. latexPages[self.physicalPage] .. ']'

  if COLOR then msg = green(msg) end

  return msg
end


openFileMessage = Message:new()
function openFileMessage:realToString()
  if RAW then return "(" .. self.filename end

  return Message.realToString(self)
end

openFileMessage.alwaysEnds = true

-- We never want to suppress these repetitions
function openFileMessage:toSummary()
end


closeFileMessage = Message:new()
function closeFileMessage:realToString()
  if RAW then return ")" end

  return Message.realToString(self)
end

closeFileMessage.alwaysEnds = true

-- We never want to suppress these repetitions
function closeFileMessage:toSummary()
end


underOverMessage = Message:new()
underOverMessage.severity = WARNING

function underOverMessage:ignoreAsRedundant()
  return not BE_REDUNDANT
end

function underOverMessage:realToString()
    local tmp = self.content
    if self.failedText ~= nil then
        if not RAW then tmp = tmp .. '\nOffending text:' end
        tmp = tmp .. '\n' .. self.failedText .. '\n'
    end

    -- in raw mode, add the final "[]"
    if RAW and self.closing ~= nil then
        tmp = tmp .. ' ' .. self.closing .. '\n'
    end

    return tmp
end

function underOverMessage:toSummary()
    underOverSummary:add(self)
end


missingCharMessage = Message:new()

function missingCharMessage:ignoreAsRedundant()
  return not BE_REDUNDANT
end

-- This is a hack: it would be too painful to define
-- pattern captures in the handler, so we do this here
function missingCharMessage:realToString()
  if self.char == nil then
      _, _, self.char, self.font = string.find(self.content,
                        '^Missing character: There is no (.-) in font (.*)!')
  end

  return self.content
end

function missingCharMessage:toSummary()
  missingCharSummary:add(self)
end


citationMessage = Message:new()

function citationMessage:ignoreAsRedundant()
  return not BE_REDUNDANT
end

function citationMessage:toSummary()
  citationsSummary:add(self)
end

referenceMessage = citationMessage:new()

function referenceMessage:toSummary()
  referencesSummary:add(self)
end

labelMessage = citationMessage:new()

function labelMessage:toSummary()
  labelsSummary:add(self)
end

unusedLabelMessage = Message:new()

function unusedLabelMessage:toSummary()
  unusedLabelsSummary:add(self)
end

extraFilesMessage = Message:new()
extraFilesMessage.severity = DEBUG

function extraFilesMessage:realToString()
  if RAW then return self.content end
  return "Loading file " .. string.sub(self.content, 2, -2)
end


--[[ ##################################################################### ]]--
--[[ ############################ SUMMARIES ############################## ]]--
--[[ ##################################################################### ]]--

-- A Summary handles a class of messages (for example, undefined citations).
-- Some of these messages may be repeated (such as one specicic undefined
-- citation that appears multiple times). We want to mention each repeated
-- group as a single item ("citation blah undefined in pages X, Y, and Z").
-- Therefore, we make self.messages a list of lists: Each "sub-list" holds
-- the messages that correspond to a specific message content (such as a
-- specific undefined citation).
SummaryPrototype = {}

-- Should filtered out messages be included in the summary? For repetitions
-- this should be false, for most others true is probably better.
SummaryPrototype.bypassMostFilters = false

function SummaryPrototype:new()
  local o = {}
  setmetatable(o, self)
  self.__index = self
  o.messages = {}
  o.header = ""
  return o
end

function SummaryPrototype:add(msg)
  local formatted = msg:toString(self.bypassMostFilters)
  if trim(formatted) == "" then return end

  -- Now that we know whether the message is actually included in
  -- the output, let's check whether we should use terseContent to
  -- identify duplicates
  if msg.terseContent ~= nil then formatted = msg.terseContent end

  -- group messages by message content
  if self.messages[formatted] == nil then
      self.messages[formatted] = {}
  end

  table.insert(self.messages[formatted], msg)
end

function SummaryPrototype:alreadySeen(msg)
  local formatted = msg:toString()
  if trim(formatted) == "" then return false end

  -- Now that we know that the message should actually be included in
  -- the output, let's check whether we should use terseContent to
  -- identify duplicates
  if msg.terseContent ~= nil then formatted = msg.terseContent end

  return self.messages[formatted] ~= nil
end

-- we use this for --no-ref-detail and --no-cite-detail
function SummaryPrototype:detailed()
  return true
end

function SummaryPrototype:toString()
  -- check if the table is empty - https://stackoverflow.com/a/1252776
  if next(self.messages) == nil then return "" end

  local text = self:processAllMessages()

  if text == "" then return "" end -- happens with repetitionsSummary

  if self.header ~= "" then
      if COLOR then self.header = green(self.header) end
      if self:detailed() then
          self.header = self.header .. '\n'
      else
          self.header = self.header .. ' '
      end
  end

  return self.header .. text
end

-- as mentioned above, self.messages holds a list of lists, such as
-- all the "undefined reference blah" messages in one sublist and
-- all the "undefined reference bleh" messages in another; process
-- each sublist separately and concatenate the results. We sort
-- because otherwise the order of the reports changes with each
-- execution, which makes comparing outputs harder.
function SummaryPrototype:processAllMessages()
  local allText = ""
  for _, messagesSublist in pairsSortedByKeys(self.messages) do
      local tmp = self:processSingleMessageList(messagesSublist)
      if tmp ~= "" then
          if self:detailed() then
              allText = allText .. '\n' .. tmp
          else
              allText = allText .. ", " .. tmp
          end
      end
  end

  -- remove leading '\n' or ', '
  local _, last = string.find(allText, '^[\n, ]+')
  if last then allText = string.sub(allText, last +1) end
  return allText
end

-- This is where we process each individual sublist, generating
-- something like "undefined reference blah in pages X, Y Z".
-- This obviously depends on the type of summary.
function SummaryPrototype:processSingleMessageList(messages)
  return ""
end

-- This receives a list of (equal) messages and returns the list of pages
-- and files where they were found. For messages that the program "sees"
-- as equal even if they include different line numbers (such as undefined
-- references), this also shows the line numbers grouped by input file.
-- If there is a line number but not a file name (this should not really
-- happen), the line number is *not* shown; not great, but fixing this is
-- more trouble than it's worth.
function SummaryPrototype:pageAndFileList(messages)

    -- Build Sets with the page numbers and file names
    -- to eliminate repetitions...
    local pages = {}
    local files = {}
    for _, msg in ipairs(messages) do
        if msg.physicalPage ~= nil then
            pages[msg.physicalPage] = true
        end

        if msg.filename ~= nil then
            if files[msg.filename] == nil then
                files[msg.filename] = {}
            end
            if tonumber(msg.linenum) then
                table.insert(files[msg.filename], tonumber(msg.linenum))
            end
        end
    end

    -- and convert them to "normal" tables so we can sort them
    local tmp = {}
    for page, _ in pairs(pages) do -- not ipairs!
        table.insert(tmp, page)
    end
    pages = tmp
    table.sort(pages)

    tmp = {}
    for filename, listOfLines in pairs(files) do
        local text = filename
        if next(listOfLines) ~= nil then -- check if the table is empty
            -- same thing here for the list of lines: generate a Set to
            -- eliminate duplicates, turn it into an array and sort it
            local setOfLines = {}
            for _, line in ipairs(listOfLines) do
                setOfLines[line] = true
            end

            listOfLines = {}
            for line, _ in pairs(setOfLines) do
                table.insert(listOfLines, line)
            end
            table.sort(listOfLines)

            local lines = listToCommaSeparatedString(listOfLines,
                                                     "line", "lines")

            text = text .. ": " .. lines
        end
        table.insert(tmp, text)
    end
    files = tmp
    table.sort(files)

    pages = listToCommaSeparatedString(pages, "page", "pages")
    files = listToCommaSeparatedString(files, "file", "files", ';')

    return pages, files
end


repetitionsSummary = SummaryPrototype:new()
repetitionsSummary.bypassMostFilters = false
repetitionsSummary.header = 'Repeated messages:'

function repetitionsSummary:toString()
  if not SILENCE_REPETITIONS then return "" end

  return SummaryPrototype.toString(self)
end

function repetitionsSummary:processSingleMessageList(messages)
  local text = ""
  if #messages > 1 then
      local pages, files = self:pageAndFileList(messages)

      local where = ""
      if pages ~= "" and files ~= "" then
          where = 'in ' .. pages .. ' (' .. files .. ') - '
      elseif pages == "" and files ~= "" then
          where = 'in ' .. files .. ' - '
      elseif pages ~= "" and files == "" then
          where = 'in ' .. pages .. ' - '
      end

      local content = messages[1]:toString()
      if trim(content) ~= "" then

          text = content .. '\n'
                 .. where
                 .. #messages .. ' repetitions'
      end
  end

  return text
end


missingCharSummary = SummaryPrototype:new()
missingCharSummary.bypassMostFilters = true
missingCharSummary.header = 'Missing characters:'

function missingCharSummary:processSingleMessageList(messages)
  local text = ""
  local pages, files = self:pageAndFileList(messages)
  local char = messages[1].char
  local font = messages[1].font

  text = 'char ' .. char .. ', font ' .. font

  if COLOR then text = red(text) end

  text = text .. ' in ' .. pages .. " (" .. files .. ")"

  return text
end


citationsSummary = SummaryPrototype:new()
citationsSummary.bypassMostFilters = true
citationsSummary.header = 'Undefined citations:'

function citationsSummary:detailed()
  return DETAILED_CITATION_SUMMARY
end

function citationsSummary:add(msg)
  -- Filter out stuff explicitly excluded by the user
  local tmp = msg:toString(self.bypassMostFilters)
  if trim(tmp) == "" then return end

  -- group messages by problem key. We do not use msg:toString()
  -- here because some messages may include the page number, making
  -- messages that are otherwise the same appear to be different.
  local key = msg.key
  if key == "" then key = '???' end

  if self.messages[key] == nil then
      self.messages[key] = {}
  end

  table.insert(self.messages[key], msg)
end

function citationsSummary:alreadySeen(msg)
  local key = msg.key
  if key == "" then key = '???' end

  return self.messages[key] ~= nil
end

function citationsSummary:processSingleMessageList(messages)
  local text = ""
  local pages, files = self:pageAndFileList(messages)
  local key = messages[1].key
  if key == "" then key = '???' end

  if self:detailed() then
      if COLOR then key = red(key) end
      text = key .. ' in ' .. pages .. " (" .. files .. ")"
  else
      text = key
  end

  return text
end


referencesSummary = citationsSummary:new()
referencesSummary.header = 'Undefined references:'

function referencesSummary:detailed()
  return DETAILED_REFERENCE_SUMMARY
end


labelsSummary = citationsSummary:new()
labelsSummary.header = 'Multiply defined labels:'

-- LaTeX does not supply details for these
function labelsSummary:detailed()
  return false
end


unusedLabelsSummary = citationsSummary:new()
unusedLabelsSummary.header = 'Unused labels:'


-- This is a little different from the others; we do not want to
-- treat different messages differently, only report that there were
-- under/overfull boxes in pages X, Y, and Z. So we store messages
-- directly in self.messages instead of using sublists.
underOverSummary = SummaryPrototype:new()
underOverSummary.bypassMostFilters = true

function underOverSummary:add(msg)
  -- Filter out stuff explicitly excluded by the user
  local tmp = msg:toString(self.bypassMostFilters)
  if trim(tmp) == "" then return end

  table.insert(self.messages, msg)
end

function underOverSummary:toString()
  if #self.messages == 0 then return "" end

  local pages, files = self:pageAndFileList(self.messages)

  local output

  if DETAILED_UNDEROVER_SUMMARY then
      output = "Under/overfull boxes:"
      if COLOR then output = green(output) end
      for _, msg in ipairs(self.messages) do
          local pageinfo = 'page ' .. msg.physicalPage
          local fileinfo = 'file ' .. msg.filename
          if COLOR then pageinfo = bright(pageinfo) end

          output = output .. '\n' .. pageinfo .. ' (' .. fileinfo .. '):\n'
                          .. msg:toString(true)
      end
  else
      output = "Under/overfull boxes"
      if COLOR then output = green(output) end
      output = output .. " in " .. pages .. " (" .. files .. ")"

  end

  return output
end


--[[ ##################################################################### ]]--
--[[ ########################## AUXILIARY STUFF ########################## ]]--
--[[ ##################################################################### ]]--

function protect_metachars(s)
  s = string.gsub(s, "%%", "%%%%")
  s = string.gsub(s, "%(", "%%(")
  s = string.gsub(s, "%)", "%%)")
  s = string.gsub(s, "%.", "%%.")
  s = string.gsub(s, "%+", "%%+")
  s = string.gsub(s, "%-", "%%-")
  s = string.gsub(s, "%*", "%%*")
  s = string.gsub(s, "%?", "%%?")
  s = string.gsub(s, "%[", "%%[")
  s = string.gsub(s, "%^", "%%^")
  s = string.gsub(s, "%$", "%%$")
  return s
end

function unprotect_metachars(s)
  s = string.gsub(s, "%%%$", "$")
  s = string.gsub(s, "%%%^", "^")
  s = string.gsub(s, "%%%[", "[")
  s = string.gsub(s, "%%%?", "?")
  s = string.gsub(s, "%%%*", "*")
  s = string.gsub(s, "%%%-", "-")
  s = string.gsub(s, "%%%+", "+")
  s = string.gsub(s, "%%%.", ".")
  s = string.gsub(s, "%%%)", ")")
  s = string.gsub(s, "%%%(", "(")
  s = string.gsub(s, "%%%%", "%%")
  return s
end

function trim(s) return (string.gsub(s, '^%s*(.-)%s*$', '%1')) end

function trimRight(s) return (string.gsub(s, '^(.-)%s*$', '%1')) end

function stringToPattern(s)
  local pat
  if string.find(s, '^////') then
      pat = string.sub(s, 5)
  else
      pat = protect_metachars(s)
      pat = string.gsub(pat, "%s+", "%%s+")
  end

  return pat
end

-- Given a string with multiple lines, returns
-- a table in which each line is an element
function linesToTable(s)
  local size = string.len(s)
  local i = 1
  local lines = {}
  while i <= size do
      -- check for \r in case the user wrongfully added to this file
      -- a pattern with an embedded dos-style "CR LF" sequence.
      local first, last, line = string.find(s, '(.-)[\r]?\n', i)

      if first == nil then
          table.insert(lines, string.sub(s, i))
          i = size +1
      else
          table.insert(lines, line)
          i = last +1
      end
  end
  return lines
end

-- copied verbatim from https://www.lua.org/pil/19.3.html
function pairsSortedByKeys (t, f)
      local a = {}
      for n in pairs(t) do table.insert(a, n) end
      table.sort(a, f)
      local i = 0      -- iterator variable
      local iter = function ()   -- iterator function
        i = i + 1
        if a[i] == nil then return nil
        else return a[i], t[a[i]]
        end
      end
      return iter
    end

function listToCommaSeparatedString(list, singular, plural, sep)
  sep = sep or ','
  if #list == 0 then return "" end

  local tmp
  if #list == 1 then
      tmp = singular .. " "
  else
      tmp = plural .. " "
  end

  for _, item in ipairs(list) do
      tmp = tmp .. item .. sep .. " "
  end

  tmp = string.sub(tmp, 1, -3) -- remove the final ", "
  return tmp
end

function green(s)
  return GREEN .. s .. RESET_COLOR
end

function bgreen(s)
  return BGREEN .. s .. RESET_COLOR
end

function red(s)
  return RED .. s .. RESET_COLOR
end

function yellow(s)
  return YELLOW .. s .. RESET_COLOR
end

function bright(s)
  return BRIGHT .. s .. RESET_COLOR
end


--[[ ##### STACK ##### ]]--

Stack = {}

function Stack:new()
  local o = {}
  setmetatable(o, self)
  self.__index = self
  return o
end

function Stack:push(val)
  table.insert(self, val)
end

function Stack:pop()
  if #self > 0 then
      return table.remove(self)
  else
      return nil
  end
end

function Stack:peek()
  return self[#self]
end

function Stack:size()
  return #self
end

function Stack:empty()
    return #self == 0
end


--[[ ##### QUEUE ##### ]]--
-- Adapted from https://www.lua.org/pil/11.4.html (in 2022)
-- We use this for the input line counter

Queue = {}

function Queue:new()
  local o = {}
  setmetatable(o, self)
  self.__index = self
  o.first = 0
  o.last = -1
  return o
end

function Queue:pushleft(val)
  self.first = self.first -1
  self[self.first] = val
end

function Queue:pushright(val)
  self.last = self.last +1
  self[self.last] = val
end

function Queue:popleft()
  if self.first > self.last then return nil end
  local val = self[self.first]
  self[self.first] = nil
  self.first = self.first +1
  return val
end

function Queue:popright()
  if self.first > self.last then return nil end
  local val = self[self.last]
  self[self.last] = nil
  self.last = self.last -1
  return val
end

function Queue:peekleft()
  if self.first > self.last then return nil end
  return self[self.first]
end

function Queue:peekright()
  if self.first > self.last then return nil end
  return self[self.last]
end

function Queue:size()
  return self.last - self.first +1
end

function Queue:empty()
    return self.first > self.last
end


--[[ ##### GLOBTOPATTERN ##### ]]--

-- convert a file glob to a lua pattern

-- globtopattern (c) 2008-2011 David Manura.  Licensed under the same terms as Lua (MIT).
-- copied verbatim from https://github.com/davidm/lua-glob-pattern

function globtopattern(g)
  -- Some useful references:
  -- - apr_fnmatch in Apache APR.  For example,
  --   http://apr.apache.org/docs/apr/1.3/group__apr__fnmatch.html
  --   which cites POSIX 1003.2-1992, section B.6.

  local p = "^"  -- pattern being built
  local i = 0    -- index in g
  local c        -- char at index i in g.

  -- unescape glob char
  local function unescape()
    if c == '\\' then
      i = i + 1; c = g:sub(i,i)
      if c == '' then
        p = '[^]'
        return false
      end
    end
    return true
  end

  -- escape pattern char
  local function escape(c)
    return c:match("^%w$") and c or '%' .. c
  end

  -- Convert tokens at end of charset.
  local function charset_end()
    while 1 do
      if c == '' then
        p = '[^]'
        return false
      elseif c == ']' then
        p = p .. ']'
        break
      else
        if not unescape() then break end
        local c1 = c
        i = i + 1; c = g:sub(i,i)
        if c == '' then
          p = '[^]'
          return false
        elseif c == '-' then
          i = i + 1; c = g:sub(i,i)
          if c == '' then
            p = '[^]'
            return false
          elseif c == ']' then
            p = p .. escape(c1) .. '%-]'
            break
          else
            if not unescape() then break end
            p = p .. escape(c1) .. '-' .. escape(c)
          end
        elseif c == ']' then
          p = p .. escape(c1) .. ']'
          break
        else
          p = p .. escape(c1)
          i = i - 1 -- put back
        end
      end
      i = i + 1; c = g:sub(i,i)
    end
    return true
  end

  -- Convert tokens in charset.
  local function charset()
    i = i + 1; c = g:sub(i,i)
    if c == '' or c == ']' then
      p = '[^]'
      return false
    elseif c == '^' or c == '!' then
      i = i + 1; c = g:sub(i,i)
      if c == ']' then
        -- ignored
      else
        p = p .. '[^'
        if not charset_end() then return false end
      end
    else
      p = p .. '['
      if not charset_end() then return false end
    end
    return true
  end

  -- Convert tokens.
  while 1 do
    i = i + 1; c = g:sub(i,i)
    if c == '' then
      p = p .. '$'
      break
    elseif c == '?' then
      p = p .. '.'
    elseif c == '*' then
      p = p .. '.*'
    elseif c == '[' then
      if not charset() then break end
    elseif c == '\\' then
      i = i + 1; c = g:sub(i,i)
      if c == '' then
        p = p .. '\\$'
        break
      end
      p = p .. escape(c)
    else
      p = p .. escape(c)
    end
  end
  return p
end


--[[ ##### PARSING THE COMMAND LINE ##### ]]--

-- loosely inspired by http://lua-users.org/wiki/AlternativeGetOpt

function simpleGetopt(args, optionsWithArgs)
  local userOptions = {} -- results will be stored here

  if optionsWithArgs == nil then optionsWithArgs = "" end

  i = 1
  while i <= #args do

    -- lua does not have "continue", so we put the loop body
    -- in a "repeat/until true" block and use break instead.
    repeat
        local optname, optval

        -- this handles "--option=blah", "--option = blah",
        --              "--option= blah" and "--option =blah"
        if string.sub(args[i], 1, 2) == "--" then
            optname = string.sub(args[i], 3)

            -- check for "--option=..."
            local equals = string.find(optname, "=", 1, true)
            if equals ~= nil then
                optval = string.sub(optname, equals +1)
                optname = string.sub(optname, 1, equals -1)

            -- check for "--option =..."
            elseif i +1 <= #args and string.sub(args[i +1], 1, 1) == '=' then
                optval = string.sub(args[i +1], 2)
                i = i +1  -- do not process this again later on
            end

            if optval ~= nil then
                if optval == "" then -- check for "...= blah"
                    optval = args[i +1]
                    i = i +1 -- do not process this again later on
                end
            else
                -- check for "--option" without "="
                optval = true
            end

            simpleGetoptStoreVal(userOptions, optname, optval)

            break
        end

        -- this handles "-a -b", "-ab", "-cVAL", "-c VAL", "-abcVAL",
        -- and "-abc VAL". Obviously, "-cVALab" does not work (where
        -- does "VAL" end?).
        --
        -- To decide whether "-cVAL" means "c with param VAL" or
        -- "options c, V, A, and L", we check optionsWithArgs.
        if string.sub(args[i], 1, 1) == "-" then
            local j = 2
            local length = string.len(args[i])

            while (j <= length) do
                local optname = string.sub(args[i], j, j) -- a single letter
                if string.find(optionsWithArgs, optname, 1, true) then
                    if j < length then
                        optval = string.sub(args[i], j +1)
                        j = length
                    else
                        optval = args[i +1]
                        i = i +1 -- do not process this again later on
                    end
                else
                    optval = true
                end

                simpleGetoptStoreVal(userOptions, optname, optval)

                j = j + 1 -- next letter
            end

            break
        end

        -- the filename is the only argument that does not start with "-"
        userOptions['filename'] = args[i]
    until true

    i = i +1 -- next arg

  end -- while i <= #args

  return userOptions
end

function simpleGetoptStoreVal(userOptions, optname, optval)
  if type(optval) ~= 'boolean' then
      local tmp = string.lower(optval)
      if tmp  == "true" or tmp == "y" then optval = true end
      if tmp  == "false" or tmp == "n" then optval = false end
  end

  if type(optval) == 'boolean' then
      if string.find(optname, "^no-") then
          optname = string.sub(optname, 4)
          optval = not optval
      end
      userOptions[optname] = optval
  else
      if type(userOptions[optname]) ~= 'table' then
          userOptions[optname] = {}
      end
      table.insert(userOptions[optname], optval)
  end
end

--[[
-- TESTING THE COMMAND LINE PARSER --

optionsWithArgs = 'abdg' -- these are followed by a parameter: "-b5", "-a true"

args = {
  '--should-be-true',
  '-a', 'param-for-a',
  '-bparam-for-b',
  '-cdparam-for-d', -- c should be true
  '-e', -- e should be true
  '-fg', 'param-for-g', -- f should be true
  '--opt1', '=', 'param-for-opt1',
  '--opt2=', 'param-for-opt2',
  '--opt3', '=param-for-opt3',
  '--opt4=param-for-opt4',
  '--also-should-be-true'
}

for k, v in pairs(simpleGetopt(args, optionsWithArgs)) do
    print("Option " .. k .. ': ', v)
end
--]]


--[[ ##################################################################### ]]--
--[[ ######################### INPUT LINES BUFFER ######################## ]]--
--[[ ##################################################################### ]]--

--[[
This is a buffer of lines from the logfile. The first line in the
buffer (self.current) is treated specially: we may alter its content
during processing and, therefore, we record its original size so
later we can detect if it may be a wrapped line (i.e., if its
original length was max_print_line characters).
--]]

Lines = {}
Lines.wrapped = {} -- memoization
Lines.linenum = 0
Lines.blankLines = Queue:new()

function Lines:gotoNextLine()
  self.current = table.remove(self, 1)
  self.currentWrapped = table.remove(self.wrapped, 1)
  self.atBeginningOfLine = true

  self.linenum = self.linenum +1
  -- blank lines have to be counted too
  if not self.blankLines:empty() then
      self.linenum = self.linenum + self.blankLines:popleft()
  end

  -- When unwrapping lines, we need to check whether a line is of
  -- the "right" size. However, we modify the content of currentLine
  -- during processing, so we capture its initial length here. See
  -- Lines:wrappingLength().
  if self.current ~= nil then
      if XETEX then
          self.currentLineInitialLength = utf8.len(self.current)
      else
          self.currentLineInitialLength = string.len(self.current)
      end
  else
      self.currentLineInitialLength = 0
      self.currentWrapped = false
  end
end

function Lines:handledChars(n)
  -- After a handler processes currentLine, it indicates how much
  -- from the line it processed, either leaving some trailing text
  -- for the next handler or leaving nothing, indicating that it is
  -- time to get more data and call gotoNextLine().
  if n == nil then
      self.current = nil
  else
      self.current = string.sub(self.current, n +1)
      self.atBeginningOfLine = false
  end

end

function Lines:len(n)
  local n = n or 0
  if n == 0 then return self.currentLineInitialLength end
  if XETEX then
      return utf8.len(self[n])
  else
      return string.len(self[n])
  end
end

function Lines:append(x)
    -- We *need* to remove blank lines here because sometimes a wrapped
    -- line is followed by a blank line, which messes our detection of
    -- wrapped lines. However, the line we skip here is not the line we
    -- are processing right now; we will encounter it in the future. So,
    -- to keep line numbers right, we need to adjust the count later too.
    if x == "" then
        if not self.blankLines:empty() then
            self.blankLines:pushright(self.blankLines:popright() +1)
        else
            self.blankLines:pushright(1)
        end
    else
        self.blankLines:pushright(0)

        table.insert(self, x)
        table.insert(self.wrapped, "unknown") -- cannot use "nil"
    end
end

function Lines:get(n)
    local n = n or 0
    if n == 0 then return self.current end
    return self[n]
end

function Lines:empty()
    return #self == 0
end

function Lines:numLines()
    return #self
end


--[[ ##### UNWRAPPING LINES ##### ]]--

function Lines:seemsWrappedMemo(n)
    if n == 0 then
        return self.currentWrapped
    else
        return self.wrapped[n]
    end
end

function Lines:setWrappedMemo(n, val)
    if n == 0 then
        self.currentWrapped = val
    else
        self.wrapped[n] = val
    end
end

function Lines:unwrapOneLine()
  -- We trust the caller to check :seemsWrapped() before
  -- calling this!
  local old = self.current
  self:gotoNextLine() -- this also updates currentLineInitialLength
  self.current = old .. self.current
end

function Lines:seemsWrapped(position)
  if not badLogfile then return false end

  if position == nil then position = 0 end

  if self:seemsWrappedMemo(position) ~= 'unknown' then
      return self:seemsWrappedMemo(position)
  end

  -- No next line, so the current line cannot be a wrapped line.
  -- The buffer should be large enough to guarantee this will
  -- only happen when we are actually at the end of the input
  if self:get(position +1) == nil then
      -- BUT, just in case the buffer does get
      -- empty before EOF, do not memoize
      --self:setWrappedMemo(position, false)
      return false
  end

  if not self:wrappingLength(position) then
      self:setWrappedMemo(position, false)
      return false
  end

  -- ok, the line length suggests this line continues on the
  -- next, but we need to be sure; let's check if there are
  -- no handlers that can manage the next line
  local result = self:noHandlersForNextLine(position)
  self:setWrappedMemo(position, result)
  return result
end

function Lines:wrappingLength(position)
  local line = self:get(position)
  local n = self:len(position) -- with XeTeX, this uses utf8.len()

  -- pdfTeX and XeTeX simply wrap at max_print_line (default 79):
  -- pdfTeX counts bytes and XeTeX counts utf8 chars. I do not know
  -- whether "chars" here means "code points" or "graphemes"; lua's
  -- utf8.len() uses code points, which is probably good enough.
  if not LUATEX and n == max_print_line then return true end

  -- With LuaTeX, we need to handle a few special cases.

  -- LuaTeX sometimes "forgets" to wrap a line. If this happens, the
  -- line is not wrapped at all. Why do we do max_print_line +1 here?
  -- Because LuaTeX sometimes wraps at max_print_line and sometimes
  -- at max_print_line +1.
  if n > max_print_line +1 then return false end

  -- Is the line the "right" length?
  -- (max_print_line or max_print_line +1)
  if n >= max_print_line then return true end

  -- Ok, n < max_print_line, so it looks like this is not a wrapped
  -- line. BUT! LuaTeX tries to wrap lines by counting bytes just as
  -- pdfTeX (and differently from XeTeX). However, it does not break
  -- a multibyte UTF-8 character (which is obviously good). This means
  -- some lines may be broken at lengths < max_print_line; let's check
  -- for this.

  -- Get the length of the first UTF-8 character on the next line:
  -- https://www.lua.org/manual/5.3/manual.html#pdf-utf8.charpattern
  local _, nextcharsize = string.find(self:get(position +1),
                                      '^' .. utf8.charpattern)

  -- if it is not a multibyte char, the line really is not wrapped
  if nextcharsize == 1 then return false end

  -- Some *very* simplistic experiments tell me that if a
  -- multibyte UTF-8 character in the last position of a line
  -- would make that line exactly max_print_line long, LuaTeX
  -- still breaks the line, even though the character would fit.
  -- This is why we do "+1" here.

  -- If the multibyte char would fit, the line really is not wrapped
  if n + nextcharsize +1 <= max_print_line then return false end

  -- The line is shorter because of an UTF-8 multibyte
  -- char, so it seems like a wrapped line
  return true
end

function Lines:noHandlersForNextLine(position)
  for _, candidateHandler in ipairs(beginningOfLineHandlers) do
      if candidateHandler:canDoit(position +1) then
          return false
      end
  end

  for _, candidateHandler in ipairs(anywhereHandlers) do
      if candidateHandler:canDoit(position +1) then
          return false
      end
  end

  return true
end

function unwrapUntilPatternMatches(pat, permissive)
  local last, tmp
  while true do
      matches = {string.find(Lines.current, pat)}
      table.remove(matches, 1) -- remove "first"
      last = table.remove(matches, 1) -- remove "last"

      if last ~= nil then break end

      if Lines:seemsWrapped() or permissive then
          Lines:unwrapOneLine()
      else
          io.stderr:write("    texlogsieve: parsing error near input line "
                               .. Lines.linenum
                               .. " (unwrapUntilPatternMatches)\n")

          PARSE_ERROR = true
          break
      end
  end

  -- We matched the given pattern. However, if the pattern ends with
  -- something like ".+", it is possible that there is still some more
  -- relevant material in the next line (this is why we should not use
  -- such patterns). There is not much we can do about that in general,
  -- but if the next line is really short, this is probably why, so we
  -- might as well try to unwrap once more. This is obviously a hack,
  -- but it saves us from problems with a couple of messages that use
  -- "filepat" at the end of the pattern.

  -- Nothing to do if the next line does not exist
  if Lines:get(1) == nil then return last, matches end

  -- these might fit in 2 chars, so do not consider lines with that
  local realMessage = string.find(Lines:get(1), '[%)%[%]]')
  if string.len(Lines:get(1)) < 3 and not realMessage then
      local together = Lines.current .. Lines:get(1)
      local candidateMatches = {string.find(together, pat)}
      table.remove(candidateMatches, 1)
      local candidateLast = table.remove(candidateMatches, 1)
      if candidateLast ~= nil then
          Lines:unwrapOneLine()
          last = candidateLast
          matches = candidateMatches
      end
  end

  return last, matches
end

-- We always call this passing a string we know is there, so
-- this should never fail. Since lines in the epilogue may
-- be wrapped at "incorrect" line lengths, we always unwrap,
-- even if "Lines:seemsWrapped()" returns false.
function unwrapUntilStringMatches(s)
  return unwrapUntilPatternMatches(protect_metachars(s), true)
end


--[[ ##################################################################### ]]--
--[[ ################## GUESSING FILENAMES AND SHIPOUTS ################## ]]--
--[[ ##################################################################### ]]--

--[[
To find the full path of a file in the TeX search path, we may use

thepath = kpse.find_file("article")
thepath = kpse.find_file("/usr/share/texlive/texmf-dist/tex/latex/base/article.cls")
thepath = kpse.find_file("latex/base/article.cls")
thepath = kpse.find_file("texmf.cnf", "cnf")

The filename may or may not include the extension; kpathsea adds the
extension if necessary according to the specified format (default is
"tex", which means extensions .tex, .sty, .cls and some others). This
is problematic, because (1) it does not recognize filenames with an
unknown extension and (2) if there is a file named "what.ever.tex" and
another one called "what.ever", it may find the wrong file.

We want to check whether a string from the log file corresponds to the
complete path of an existing filename, which means we already know the
extension. Therefore, we will use the type "other text files". This
prevents kpathsea from adding the extension automatically (which is what
we want) and also prevents kpathsea from searching the standard TeX path,
which is also nice because we are checking a complete path.
--]]

function guessFilename(position)
  if position == nil then position = 0 end
  local line = Lines:get(position)
  local _, last = string.find(line, '^%s*%(')
  if last ~= nil then line = string.sub(line, last +1) end

  local quotes, filename = guessQuotedFilename(line, position)

  if filename ~= nil then return filename end

  if quotes then return nil end

  return guessUnquotedFilename(line, position)
end

function guessShipoutPage(position)
  -- In general, a shipout is identified by "[\count0.\count1.\count2..."
  -- followed by a space or "]". However, there are complications:
  --
  -- 1. Dates, such as [2020/10/03] or [2020-10-03]
  --
  -- 2. Files added to the shipout, such as
  --    "[3{somefile.tex}" or "[3<somefile.pdf>"
  --
  -- We can either:
  --
  -- 1. Specify what is allowed: "^(%d+[%d%.]*)[ %]<{]" or "^(%d+[%d%.]*)$"
  --
  -- 2. Exclude what is forbidden: "^(%d+[%d%.]*)[^/-]"
  --
  -- Let's go with option 1.

  local patterns = {
      "^(%d+[%d%.]*)[ %]<{]",
      "^(%d+[%d%.]*)$"
  }

  if position == nil then position = 0 end
  local line = Lines:get(position)
  local _, last = string.find(line, '^%s*%[')
  if last ~= nil then line = string.sub(line, last +1) end

  -- If this is a 79-chars line that ends with ...[NUMBER, we will try
  -- to unwrap. With that, we may find ...[LONGERNUMBER, which is what
  -- we want. If, however, we find [NUMBERLETTER, we give up and consider
  -- this not to be a shipout. Should we stick with [NUMBER instead?
  --
  -- No.
  --
  -- Either way, we have no way of knowing whether we are right or wrong,
  -- but giving up is the lesser evil: the user may register the unknown
  -- message with --add-[debug|info|warning]-message and solve the problem.
  local page
  while true do
      for _, pattern in ipairs(patterns) do
          _, _, page = string.find(line, pattern)
          if page ~= nil then
              -- just the number, please
              local _, last = string.find(page, '^%d+[%d%.]*')
              page = string.sub(page, 1, last)
              break
          end
      end

      if not Lines:seemsWrapped(position) then break end

      local nextline = Lines:get(1)
      line = line .. nextline
      position = position +1
  end

  return page -- may be nil
end

function guessQuotedFilename(line, position)
  -- luatex puts quotes around filenames with spaces, which makes things easier
  while true do

      -- no quotes
      if line ~= "" and not string.find(line, '^"') then return false end

      local _, last = string.find(line, '^"' .. filepat .. '"')
      if last ~= nil then
          local filename = string.sub(line, 1, last)
          if checkIfFileExists(filename) then
              return true, filename
          else
              return true, nil -- there are quotes, but no filename
          end
      end

      -- no closing quote or the line is too short; can we unwrap this line?
      if not Lines:seemsWrapped(position) then
          io.stderr:write("    texlogsieve: parsing error near input line "
                               .. Lines.linenum .. " (guessQuotedFilename)\n")

          PARSE_ERROR = true
          return true, nil
      end

      -- yep!
      line = line .. Lines:get(position +1)
      position = position +1
  end
end

-- no quotes; let's exhaustively extract the substrings of currentLine
-- and use kpse.find_file() to check if any of them is an existing
-- filename.
function guessUnquotedFilename(line, position)
  -- In subsequent iterations, we do not check the smaller strings
  -- we already checked in the previous ones.
  local alreadyCheckedIdx = 0

  local filename
  while true do

      local longest = string.len(line)
      local notWrapped = false

      -- if the line contains one of these chars, stop before it
      local first = string.find(line, "[%(%)%[%]%{%}%<%>]")
      if first ~= nil then
          notWrapped = true -- this line is obviously not wrapped
          longest = first -1
      end

      -- From longest to shortest, to avoid problems if there is a
      -- substring in the filename that matches some other filename.
      for i = longest, alreadyCheckedIdx +1, -1 do
          local candidate = string.sub(line, 1, i)

          -- We are gradually removing chars from the end of the string.
          -- We could just do that, but there is a possible optimization:
          -- if the string is part of a filename that is wrapped and we
          -- reach a slash, only the directories remain and we do not need
          -- to continue reducing, we can proceed to unwrap the line.
          -- However, the line may not be wrapped at all: instead, it
          -- might include a (shorter) filename followed by something else
          -- (such as a URL) that includes a slash character. So, we can
          -- only optimize if we know for sure that this is not the case. We
          -- can be sure that this is not the case when there are no spaces
          -- in the candidate string.
          if not string.find(candidate, ' ')
              and string.sub(candidate, #candidate) == "/" then break end

          if checkIfFileExists(candidate) then
              filename = candidate
              break
          end
      end

      -- We may or may not have found the filename. Either way, if we
      -- can unwrap this line, we should:
      -- 1. If we did not find the filename, we might
      -- 2. If we did find the filename, we might find a different
      --    (longer) filename, in which case we should stick with
      --    it (yes, I have seen it happen!)
      if notWrapped or not Lines:seemsWrapped(position) then
          return filename
      end

      alreadyCheckedIdx = longest
      line = line .. Lines:get(position +1)
      position = position +1
  end
end

function readFls(logfilename)
  if not string.find(logfilename, '%.log$') then return end

  local flsfilename = string.gsub(logfilename, '%.log$', '.fls')

  -- It is ok for this to fail, so no "assert"
  local flsfile = io.open(flsfilename, 'r')
  if flsfile == nil then return end

  -- Let's be reasonably sure that we are not dealing
  -- with a stale .fls file from a previous run
  local logdate = lfs.attributes(logfilename, 'modification')
  local flsdate = lfs.attributes(flsfilename, 'modification') or 0
  local timediff = math.abs(logdate - flsdate) -- seconds
  if timediff > 5 then return end

  -- We are good to go!
  USE_FLS_FILE = true

  filelist = {}
  while true do
      local line = flsfile:read("*line")

      if line == nil then io.close(flsfile) return end

      -- If we are running in a unix-like OS but the files we are
      -- processing were generated in Windows, lua may leave a \r
      -- character at the end of the line; if this happens, remove it
      local _, last = string.find(line, '\r$')
      if last ~= nil then line = string.sub(line, 1, last -1) end

      _, last = string.find(line, '^[IO][NU]T?PUT ')
      if last ~= nil then line = string.sub(line, last +1) end

      -- I don't think this ever happens
      if string.find(line, '^".*"$') then line = string.sub(line, 2, -2) end

      -- No idea what is this, but it happens with the MiKTeX version
      -- of luatex for Windows. It apparently only appears with font
      -- files, which are irrelevant here, but let's be safe
      if string.find(line, '^\\\\%?\\') then line = string.sub(line, 5) end

      line = string.gsub(line, '\\', '/')

      _, last = string.find(line, '^%./')
      if last ~= nil then line = string.sub(line, last +1) end

      -- Save as a Set to eliminate duplicates
      if not string.find(line, '^PWD') then filelist[line] = true end
  end
end

function checkIfFileExistsWithFls(filename)
  if string.find(filename, '^%./') then filename = string.sub(filename, 3) end

  for tmp, _ in pairs(filelist) do -- not ipairs!
      if tmp == filename then return true end
  end

  return false
end

function checkIfFileExistsWithKpse(filename)
  -- Is this something like "blah.ext"? If so, make it "./blah.ext",
  -- otherwise kpse.find_file may find some file in the TeX path that
  -- has nothing to do with us.
  local onlyName = true

  -- "C:/"
  if string.find(filename, '^%a%:/') then onlyName = false end

  -- "./" or just "/"
  if string.find(filename, '^[%.]?/') then onlyName = false end

  if onlyName then filename = './' .. filename end

  if kpse.find_file(filename, 'other text files') ~= nil then
      return true
  else
      return false
  end
end

function checkIfFileExists(filename)
  -- If there are quotes, remove them
  if string.find(filename, '^".*"$') then
      filename = string.sub(filename, 2, -2)
  end

  -- MiKTeX for windows does not necessarily use the same type of
  -- slashes in paths inside the log file and the .fls file. I also
  -- do not know if kpse.find_file works ok with backslashes. Let's
  -- just turn everything to forward slashes and be done with it.
  filename = string.gsub(filename, '\\', '/')

  -- If we are reading a logfile (not stdin) and there is a
  -- corresponding .fls file, use it. Maybe this is slightly more
  -- reliable, but the real advantage is that we can process a
  -- logfile even if we are not on the same environment that
  -- created it, which is nice for testing, reporting bugs, etc.
  -- Unfortunately, some files such as images may sometimes not
  -- be included in the .fls file, so let's be extra cautious.
  if USE_FLS_FILE then
      if checkIfFileExistsWithFls(filename) then return true end
  end
  return checkIfFileExistsWithKpse(filename)
end


--[[ ###################################################################### ]]--
--[[ ######################## BEGINNING OF SCRIPT ######################### ]]--
--[[ ###################################################################### ]]--


main(arg)