%% xmltex.tex

%% Copyright 2000 David Carlisle, NAG Ltd.
%% re-released by Sebastian Rahtz June 2002
%% This file is distributed under the LaTeX Project Public License
%% (LPPL) as found at http://www.latex-project.org/lppl.txt
%% Either version 1.0, or at your option, any later version.

\catcode`\{=1 \catcode`\}=2 \catcode`\@=11 \catcode`\#=6

\gdef\XML@tempa#1: #2.tex,v #3 #4 #5 #6 #7${
  \def\xmltexversion{#4 v#3 (#6)}}

$Id: xmltex.tex,v 1.9 2002/06/25 rahtz Exp $


% initial setup so that xmltex independent of any existing format
% even if normally built on top of latex


\catcode`\|=14 %
\catcode`\~=13 %

  \countdef\count@200 % fingers crossed
  \expandafter\def\csname newcount\endcsname#1{}
  \expandafter\def\csname newtoks\endcsname#1{}
  \def\space{ }
  \chardef\active13 %
  \countdef\z@0 %
  \countdef\@tempcnta202 % fingers crossed
  \countdef\@tempcntb203 % fingers crossed

% set up 7bit range
      \ifnum\catcode\count@=11 %
    \expandafter\edef\csname 8:\string^^@\endcsname{\string^^@}}
 \ifnum\count@<127\advance\count@1 \expandafter\XML@tempa\fi}

\catcode`\/0 \catcode`\\=\active /gdef\(/utfeightay\)



  \catcode`\^7 %

% This is standard in LaTeX.
\def\zap@space#1 {#1}


\def\input#1{\@@input#1 }

  \def\IfFileExists#1#2#3{\def\@filef@und{#1 }#2}


\immediate\write20{xmltex version: \xmltexversion}




% set catcodes of low chars to 12 and hight 10 13.


% white
  \catcode`\ \active
% xml
% tex
% and these are not catcodes

\catcode`\ 10

%% half baked bom and utf-16 support

% do this also in everyjob incase a <?xmltex dump?> has turned it iff.


%% warnings


% quoted literals
% " or '
% #1 command to call, gets quoted string as #1
% #2 " or '
% " and ' assumed active



% < handler

% / ! ? assumed active
  \catcode`\^^I=10 %
  \catcode`\^^M=10 %
  \catcode`\ =10 %


%% reset catcodes
  \catcode`\ =10 
% catcodes?

%% begin tag
  \catcode`\ \active
% #1 first char of name
% grab the name into an xdef so that derminating string can
% be any of whitespace / or >
\catcode`\ \active
\def {\iffalse{\fi}\XML@getname@}
\let^^M %
\let^^I %


% finish the special group for the name xdef, start an XML@begingroup
% for the element, and begin processing any attributes.
  \edef\XML@w@{ \XML@w@}

% elements put in an \XML@begingroup which is a not a group at the
% outer level to save save stack, but turns itself into a group
% for nexted elements.


%% attributes

% #1 first letter of attribute name, or / or > to finish.



% restore normal XML whitespace regime
% namespace check element name (has to be done after attribute handling)
% trace the element start then `do' the element which might be
% noop, execute package code, or grab, depending.

  \catcode`\ \active
    \csname XMLNS@\XML@this@prefix\expandafter\endcsname\noexpand:


% This is a noop if tracing turned off, otherwise loop through
% attlist with a typeout on each.
    \XML@trace@warnE{ >}

% execute package code for an element start.


% grab element content into a token register.
% has to pass through xdef to normalise encodings.
\catcode`\ \active\catcode`\^^M\active\catcode`\^^I\active
% check if I can switch this just once, where grabelement switched.
   \expandafter\let\csname XMLNS@0\endcsname\XMLNS@
\catcode`\ \active\catcode`\^^M\active\catcode`\^^I\active

% #1  namespace
% #2  local name
% #3  value

  \protected@xdef\XML@tempa{\csname XMLNS@#1\endcsname:#2}
    \XML@tempa="#3" }}

% #1 should be empty, between the / and the >
% probably should put some internal form here rather than literally adding
% end tag to be reparsed, but this simplifies grab code.

%% check we know what to do with an element
% catcode :?    \csname E:\XML@this@element\endcsname
    \csname E:\csname XMLNS@\XML@this@prefix\endcsname:\XML@this@local\endcsname
   \ifnum0=\csname XMLNS@\XML@this@prefix\endcsname
% prerun catalogue in this case which might coerce element into a new namespace
     \edef\XML@NAMESPACE{\csname XMLNS@\XML@this@prefix\endcsname}
   \expandafter\ifx\csname E:\csname

%% end tag

\catcode`\ \active
% removed by Peter Housel housel@acm.org 2000/09/06 
% \catcode`\^^M\active\catcode`\^^I\active
  \XML@getend@a#2 \@}


% namespace normalise element name in end tag.
\gdef\XML@getend@a#1 #2\@{
\catcode`\ \active\catcode`\^^M\active\catcode`\^^I\active
  \XML@trace@warn{</\csname XMLNS@\XML@this@prefix\endcsname:\XML@this@local>}

% execute package code for element end.
    E/:\csname XMLNS@\XML@this@prefix\endcsname:\XML@this@local
% added by Peter Housel housel@acm.org 2000/09/06 
 \catcode`\^^M\active \catcode`\^^I\active \catcode`\ \active}

% flag children: put \@empty after every child element
% used by \xmltextwochildren and friends

% need active and non active : and / so...

% end inner group to restore \XML@doelement and friends
    \XML@trace@warn{Grabbed content}
      E.*\csname XMLNS@\XML@this@prefix\endcsname
    \XML@trace@warn{End grabbed content}
% \XMLstring usage means catcode restoring varies
% must fix this one day, for now use ifnum avoidance
\catcode`\ \active\catcode`\^^M\active\catcode`\^^I\active
      \csname XMLNS@\XML@this@prefix\expandafter\endcsname\noexpand:

% syntax for these will probably change.
  \xmltexf@rall#1#2< >\@empty}

\def\xmltexf@rall#1#2<#3 #4>#5\@empty{
  \def\xml@name{#3}#1{<#3 #4>#5}

% #1  entity name (or tex macro holding same, eg from an attrbute)
% #2 #3 inserted before each compontent
% so #2{GIF}#3{xxx/yyy.gif}





% this relies on = being catcode 13.

   \XML@set@this@attribute#1#2 \@


%% remove trailing space from `foo ='
\def\XML@set@this@attribute#1 #2\@{
% should probably do an edef at this point, and optimise
% later code

% #1 = attribute value
% allow for arbitrary catcodes.
% catcode avoidance
% check if this still needed with protect setting?
%       \def"{\noexpand&quot;}

%% activate attributes

% this seems over complicated and perhaps I should re-implement.
% currently tries to avoid making a csname for each attribute.
% declaration of attributes provides a normal tex command name
% to access the value in the element code, could  have instead
% just had declaration of attribute name and default and used something
% like \attributevalue{html:href} but would need to work out a way
% of resolving prefixes at definition time if this was embedded in
% the element code. (The prefix such as `html' used in the definition
% file isn't known by the time the code is run.) 
  \XML@trace@warn{\@spaces\csname XMLNS@#1\endcsname:#2 = \string"#3\string"}}

% #1 element specific attribute defaults, first token is
% macro for namespace-global attributes (hence \expandafter)

% #1 prefix
% #2 local name
% #3 value
       \noexpand\XML@attrib\csname XMLNS@#1\endcsname:#2\relax}}

% #1 \XML@attrib qname\relax
% #2 value given in document instance

% #1 \XML@attrib qname\relax
% #2 original attribute defaults

% if default is inherit, set it to \relax the first time, otherwise
% let whatever value it has drop through. Note this is inheritance of the
% tex csname declared as the internal access, not of the xml attribute name. 
% #1 = junk
% #2 = tex csname
% #3 = attribute default (should be encoding neutral: not normalised)
  \ifx\inherit#3\relax% #3 might be empty

% any distinguishing value would do...


%%%%% namespace declarations


% need to protect against active chars

% #1 prefix (or empty)
% #2 uri
% globally allocate number to uri if new
% locally alocate prefix to number
% globally allocate number as a prefix (canonical form)
  \expandafter\edef\csname XMLNS@#1\endcsname
                      {\csname XML:#2\endcsname}
  \XML@trace@warn{xmlns:#1 = \csname XMLNS@#1\endcsname}

% and the same without any prefix
% always use in scope of utfeight protect
% #1 uri
  \expandafter\ifx\csname XML:#1\endcsname\relax
    \expandafter\xdef\csname XML:#1\endcsname{\the\XML@ns@count}
    \global\expandafter\let\csname A:\the\XML@ns@count\endcsname\@empty
    \XML@trace@warnNI{URI: \csname XML:#1\endcsname\space = #1}
    \expandafter\xdef\csname XMLNS@\the\XML@ns@count\endcsname

% and version for xmt files

%% namespace support

%% : is active in xml state but inactive in tex state, so need to do
%% this twice, grrr...

% #1 = qname to be split on :

% #1 = prefix (or empty)
% #2 = local name or \@ if no prefix

% same with inactive :
% #1 = qname to be split on :

% #1 = prefix (or empty)
% #2 = local name or \@ if no prefix



% nullnamespace 
\expandafter\def\csname XML:\endcsname{0}
\expandafter\let\csname A:0\endcsname\@empty
\expandafter\gdef\csname XMLNS@0\endcsname{0}

% xml namespace 
\expandafter\def\csname XML:http://www.w3.org/1998/xml\endcsname{1}
\expandafter\let\csname A:1\endcsname\@empty
\expandafter\gdef\csname XMLNS@1\endcsname{1}

% #1 = prefix or empty
% #2 = local name

% pi
\catcode`\ \active

\catcode`\ \active
\let \endcsname


  \catcode`\ \active


% currently ? not reset by XML@reset
\expandafter\def \csname Q:xmltex\endcsname{


% #1 = piname or `piname Undefined'





%% XML and declarations
% only care about encoding. Ignore version and standalone.

% #1 content
% #2 end of test code

\expandafter\gdef \csname Q:xml\endcsname{
  \catcode`\ 10 %

  \catcode`\ \active
  \XML@encoding#1 e="utf-8"\relax}

% actually encoding supposed to be after version if it comes at all
% so I could simplify this and get rid of the loop.
\gdef\XML@encoding#1 #2{



% do nothing if newly specied encoding is same as old one
% #1 is name of encoding (upper or lower case, and dubious catcodes
% #2 is junk

      \XML@trace@warnNI{Encoding = \XML@thisencoding}

% public version of same

% catcode neutral lowercase utf-8


%% internalise utf8 encoding as needed before every file include.


    \XML@trace@warnNI{Encoding = \XML@thisencoding}


  }{\XML@warn{No file: #1}}}

% in principle a parsed entity might just be text with no markup
% but the utf16 is so broken anyway don't worry about that.


%% declarations
% made safe against active chars.
% #1 = rest of if test
% #2#3 = first two characters after <!
  \if-\noexpand#2\XML@comment     %   --
  \else\if N\noexpand#3\XML@entity%   EN TITY
  \else\if L\noexpand#3\XML@dec@e%    EL EMENT
  \else\if A\noexpand#2\XML@dec@a%    AT TLIST
  \else\if D\noexpand#2\XML@doctype%  DO CTYPE
  \else\if C\noexpand#3\XML@cdata%    [C DATA 
  \else        \XML@dec@n%            NO TATION
%                                      could also pick up [IGNORE/[INCLUDE
%                                      but they not allowed in internal subset.

%% Just skip element declarations
% #1 = rest of \if
% #2 ELEMENT declaration

% attribute declarations
% #1 = rest of if test + TLIST
% #2 = element name
\def\XML@dec@a#1 #2 {

\gdef\XML@dec@a@x#1 #2{


% #1 = enumerated attribute type tokens, up to )


% #1 = junk up to next space token
\def\XML@dec@a@type#1 {


% #1 = nextchar after space, if it is # step to next space
% otherwise look for possible " or ' or >




  \XML@warn{Default: \XML@tempa\space\XML@tempb="#1"}
% reusing this wastes some tests but only done in local subset


% this comparison is encoding normalised, but namespace unaware, grr.
% stop  getattrib looking for nexted xml syntax attribute setting.

%% comment
%  - is active
% #1 = rest of if test
% #2 = comment text
  \catcode`\ \active
  \XML@trace@warn{\string<!-- -->}



%% entity defs

% #1 = rest of \if test + TITY
% #2 = % or entity name

\gdef\XML@entity#1 #2 {
  \XML@trace@warn{\XML@ename; = }


% input some file at most once (and ignore arguments that
% expand to empty)
% done in a local group
  \expandafter\ifx\csname xmt:#1\endcsname\relax
  \global\expandafter\let\csname xmt:#1\endcsname\@ne
% package files should have their own namespace declarations
% don't want to inherit from some random point when file is loaded.
% in principle should clear all prefix assignments in local scope
% but I don't currently maintain a list of those, and not needed as long
% as package files declare all prefixes used. But do set the default
% namespace back to the null namespace.
% should force utf-8 as well.

% ignore include of empty filename
\expandafter\let\csname xmt:\endcsname\@ne

% \noexpand protect against active ascii
\gdef\XML@p@ent#1 #2{
  \XML@trace@warn{\XML@ename; = }

% #1 = next char( P or S for external entities)


% special `prefix' that just removes following colon.
\expandafter\gdef\csname XMLNS@*\endcsname#1{}



%#1 = " or '
% make " or ' close the grab `element'. the nameless close tag is completed
% by the > coming from the ENTITY declaration syntax.
% Using a mangled grab code is a bit complicated but it allows
% catcode 10 simplification in the normal case of element handling
% and allows characters to be correctly normalised to utf8.
% stop xmlns `attribute' being recognised
% set up special prefix to gobble colon
% disable these as nothing will be known until namespaces reenabled
% hobble namespace code to put all name in local part.


% expandafter away an \else clause in grabelement then check for ]

% this hack has to undo the one above moarked by
%   % \XMLstring usage means catcode restoring varies
% must fix this as well one day

% need to add (somewhere) a replacement of " to &quot; so that
% xxx='a"b"c'  doesn't end up as xxx="a"b"c"
% #1 replacement text
% #2 white space and  >

%% check for ]> that ends internal subset

% #1 is next token in local subset (normally < or >)
% after subset finishes % stops being markup, and the package
% relating to any external entity in the doctype is loaded.
  \catcode`\ 10 %

% #1 = next character


%% #1#2 just gobble ]>
  \catcode`\ \active
    \XML@trace@warn{Doctype Package: \XML@D@dtd}

% #1 = rest of \if test + PUBLIC
\def\XML@E@public#1 {

% #1 = FPI

% #1 = rest of if test up to SYSTEM
\def\XML@E@system#1 {

% #1 URL
% #2 next token, N or >
  \XML@trace@warn{\@spaces Public: \XML@PUBLIC}
  \XML@trace@warn{\@spaces System: \XML@SYSTEM}

% NDATA token terminated by > or white space
% #1 = DATA
% #2 ndata token with possible extra space
\def\XML@E@ndata#1 #2>{\XML@ndata@#2 >}

% #1 = ndata toke
% #2 = junk
\def\XML@ndata@#1 #2>{

% #1 rest of \if test + OCTYPE
% #2 document element name
% #3 P or S or [ or >
% noexpand for P and S, [ assumed active


\gdef\XML@doctype#1 #2 #3{
  \XML@trace@warn{Document Element: \documentelement}
  \else%must be > the end

\gdef\XML@D@empty @{

% #1 = rest of \if test + UBLIC
\gdef\XML@D@public#1 {

% #1 = FPI

% #1 = rest of \if test + YSTEM
\gdef\XML@D@system#1 {

% #1 = URI
  \XML@trace@warn{Doctype Public: \XML@PUBLIC}
  \XML@trace@warn{Doctype System: \XML@SYSTEM}

% #1 = rest of if test

% #1 = [ for local subset or > for the end.
    \XML@trace@warn{Internal Subset[}
    \edef\XML@w@{ \XML@w@}
      | it had better be the closing >


%% catalogue support

%% should rationalise this code

% #1 = FPI
% #2 = xmltex package file

% #1 = URI
% #2 = xmltex package file

% #1 = namespace URI
% #2 = xmltex package file
  \edef\@tempa{{\csname XML:#1\endcsname}}

% #1 = unprefixed element name
% #2 = xmltex package file

% #1 = unprefixed element name
% #2 = namespace URI
  \edef\@tempa{{#1}{\csname XML:#2\endcsname}}






    \XML@trace@warn{ \XML@this@element}


% entity refs








% predefined definitions

%% character refs

% longwinded way so can share code later, also need to do d case.
% this does up to x1F FFFF which is higher than needed for XML
% (x10 FFFF)

% definition is of form 
% \utfeightX <non active char>+
% except for non active chars below 128 which are just def of catcode 12 version.
  \uppercase{\count@\if x\noexpand#1"\else#1\fi#2}\relax
     \XML@utfeight@b C\utfeightb.,
     \XML@utfeight@b E\utfeightc.{,;}
     \XML@utfeight@b F\utfeightd.{!,;}

% while I support mixed tex/xml files I need to have a version
% of &#123; that always fetches the definition even if
% character is currently non active

  \uppercase{\count@\if x\noexpand#1"\else#1\fi#2}\relax
     \XML@utfeight@b C\utfeightb.,
     \XML@utfeight@b E\utfeightc.{,;}
     \XML@utfeight@b F\utfeightd.{!,;}




% #1 unicode slot, either 123 decimal or xA23 hex
% #2 tex definition of character when used as character data.
% code for chars below 127 somewhat experimental
% suppress active test in charref




  \edef\XML@next@level{ \XML@w@}




%% cdata

% #1 = DATA
\gdef\XML@cdata #1[{
  \catcode`\ \active

% #1 = CDADA section text


% #1 = CDADA section text

% #1 = CDADA section text

% notation

% parse past the public and system ids
% in case they contain a >.
% unlike entities PUBLIC need not have a system id
% #1 junk
% #2 notation name
\def\XML@dec@n#1N #2 #3 {
  \XML@trace@warn{NOTATION #2}


%% xmt definitions

%% #1 element, may use prefix : using prefixes in current scope
%% #2 attlist
%% #3 begin code
%% #4 end code
%% if #3 is \xmlgrab, #4 may access the content of the element as #1
% attlist
  \toks@\expandafter{\csname A:\csname
  \expandafter\gdef\csname E\XML@tempc\expandafter\endcsname
    \expandafter\gdef\csname E\string/\XML@tempc\endcsname##1
    \expandafter\gdef\csname E\string/\XML@tempc\endcsname


%% #1 attribute, may use prefix : using prefixes in current scope
%% #2 macro to access attribute in begin or end code
%% #3 default

      \csname XMLNS@\XML@this@prefix\endcsname

      \csname XMLNS@\XML@this@prefix\endcsname


% version for namespace global attributes, used at top level.
%% #1 prefix for namespace this is for.  using prefixes in current scope
%% #2 attribute, may use prefix : using prefixes in current scope
%% #3 macro to access attribute in begin or end code
%% #4 default

   \toks@\expandafter\expandafter\expandafter{\csname A:\csname
  \expandafter\xdef\csname A:\csname
   \toks@\expandafter\expandafter\expandafter{\csname A:\csname
  \expandafter\xdef\csname A:\csname

%% #1 QName, may use prefix : using prefixes in current scope
%% #2 macro to access attribute in begin or end code

      \csname XMLNS@\XML@this@prefix\endcsname





%% #1 name
%% #2 code, gets data as #1
  \expandafter\gdef\csname P:#1\endcsname##1{#2}}

%% xmltex format support
  \immediate\write20{xmltex version: \xmltexversion:}
\ActivateASCII{45}% -
  \endgroup}{\XML@warn{No File: xmltex.cfg}}
  \endgroup}{\XML@warn{No File: \jobname.cfg}}

%% not currently used
%  \let\XML@restore\XML@catcodes
%  \let\XML@setlatexcodes\relax

%% allow package and class loading with normal latex catcodes

  \catcode`\^^M5 %


% and similar for input of aux files
  \IfFileExists{#1}{\@@input\@filef@und}{\typeout{No file #1.}}

% and end doc
       \makeatletter \input\jobname.aux
     \ifdim \font@submax \string>\fontsubfuzz\relax
       \@font@warning{Size substitutions with differences\MessageBreak
                  up to \font@submax\space have occured.\@gobbletwo}
       \ifx \@multiplelabels \relax
           \@latex@warning@no@line{Label(s) may have changed.
               Rerun to get cross-references right}

%% protected write
\long\def \protected@write#1#2#3{

%% typeout etc

% xmltex namespace 
\expandafter\let\csname A:2\endcsname\@empty
\expandafter\gdef\csname XMLNS@2\endcsname{2}

% this is some currently used code aiming at having an aux file
% in xml syntax using commands from the xmltex namespace.
% this would avoid some of the problems involved in having
% mixed xml/tex aux files.
  <2:contentsline level="#2">

      {\let\label\@gobble \let\index\@gobble \let\glossary\@gobble}
      {<2:@writefile ext="#1">#2</2:@writefile>}}


% end of xml-aux code.

% tracing



%% xmltex PI
%% <?xmltex tracingall ?>
%% <?xmltex typeout {hello world!} ?>
%% content of pi may be a latex command.
%% Arguments may be given in {} as usual
%% Do not use \ . First `word' (ignoring white space) taken as a tex
%% command name. May be a standard latex command, as here,
%% or some special command defined in a cfg file or package.

   \csname\zap@space#1 \@empty\expandafter\endcsname
      \XML@pi@b\csname\zap@space#1 \@empty\endcsname[#2]




%% unicode support

% \utfeighta#1         1 byte utf8 char
% \utfeightb#1#2       2 byte utf8 char
% \utfeightc#1#2#3     3 byte utf8 char
% \utfeightd#1#2#3#4   4 byte utf8 char

    \csname 8:\string##1\endcsname}
    \csname 8:##1\string##2\endcsname}
    \csname 8:##1\string##2\string##3\endcsname}
    \csname 8:##1\string##2\string##3\string##4\endcsname}}


% do this also in everyjob

% for moving internal arguments  (not writes)

% for external files (expands one in an edef and once in a write

% for typeouts and immediate writes and messages
%  \let\utfeightax\noexpand
%  \let\utfeightay\noexpand
%  \let\utfeightaz\utfeighta@ref
%  \let<\relax\let&\relax
%  \def\utfeightb##1##2{##1\string##2}
%  \def\utfeightc##1##2##3{##1\string##2\string##3}
%  \def\utfeightd##1##2##3##4{##1\string##2\string##3\string##4}}

% plan b

% for csname (will fall over < or & but they should not be there)


%% mapping input encodings to unicode.

   \count@"0\if\noexpand x#1\relax\else\count@#1\fi\relax

% default encoding

%% need to change this default if hit BOM or xml or text decl,

% adding xmltex style protection to standard latex commands.
% if xmltex being used with other formats this does no harm
% except take up a bit of space.


     \let\label\relax \let\index\relax \let\glossary\relax
     \let\label\relax \let\index\relax \let\glossary\relax


% this one not safe, restore not complete

% should really save and restore, not always back to markup

%%%%% stop this doing damage, also stops it working


% support for making 7bit characters active

  \uppercase{\count@"0\if x\noexpand#1\relax\else\count@#1\fi\relax}

% some of these should not be active by default, but OK for now.
% could use \ActivateASCII for most if not all of these now.
% should probably use \UnicodeCharacter for these now

% activateacii would use ax code, want ay code so ^ not written to aux files
% and clash with tex usage. so locally lie and make it a letter
\ActivateASCII{94}% ^ for tex ^^ notation in aux files

\ActivateASCII{x5C}% \
\ActivateASCII{x5F}% \
\ActivateASCII{123}% {
\ActivateASCII{125}% {

% white space

\UnicodeCharacter{13}{ \ignorespaces}
\UnicodeCharacter{32}{ \ignorespaces}
\UnicodeCharacter{9}{ \ignorespaces}

\expandafter\def\csname 8:\string^^M\endcsname{\leavevmode\hfil \break\null}}

\expandafter\def\csname 8:\string^^M\endcsname}

\expandafter\def\csname 8: \endcsname{\nobreakspace}}

% tabs just do whatver the current space does.
\expandafter\def\csname 8:\string^^I\expandafter\endcsname
       \expandafter{\csname 8: \endcsname}

% tex conventions
  }{\XML@warn{No File: xmltex.cfg}}

% get xmltex in catcode 12, for comparing with \jobname

% if jobname is xmltex or pdfxmltex dump the format, otherwise
% try to load \jobname.cfg  and input \xmlfile
% put white space back so the filename can be read off the command line
\catcode`\ =10\relax
  }{\XML@warn{No File: \jobname.cfg}}


\endlinechar`\^^M \expandafter\XML@catcodes\XML@tempa