@q Copyright 2012-2024, Alexander Shibakov@>
@q This file is part of SPLinT@>

@q SPLinT is free software: you can redistribute it and/or modify@>
@q it under the terms of the GNU General Public License as published by@>
@q the Free Software Foundation, either version 3 of the License, or@>
@q (at your option) any later version.@>

@q SPLinT is distributed in the hope that it will be useful,@>
@q but WITHOUT ANY WARRANTY; without even the implied warranty of@>
@q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the@>
@q GNU General Public License for more details.@>

@q You should have received a copy of the GNU General Public License@>
@q along with SPLinT.  If not, see <http://www.gnu.org/licenses/>.@>
\input limbo.sty
\def\optimization{5}
\input yy.sty
\input dcols.sty
\modenormal
\initauxstream

@** Parser file.
This is an example parser for expressions. It takes
advantage of some of the features of \splint\ generated parsers,
although anything that takes more than a straightforward setup is
omitted. 

The top-level structure of the input file presents no surprises and is
presented below.
\let\currentparsernamespace\parsernamespace
    \def\parsernamespace{[edisplay]}
    \def\hostparsernamespace{[edisplay]}
    \input etoks.sty
\let\parsernamespace\currentparsernamespace
\def\texnspace{[other]}% no pretty printing of \TeX
@s TeX_ TeX

@(expp.yy@>=
@G Switch to generic mode.
%{@> @<\.{expression} parser \Cee\ preamble@> @=%}
  @> @<Bison options@> @= 
%union {@> @<Union of parser types@> @=}
%{@> @<\.{expression} parser \Cee\ postamble@> @=%}
  @> @<Token and types ...@> @= 
%%
  @> @<Parser productions@> @= 
%%
@g

@ The \prodstyle{\%token-table} option is not merely a debugging help,
as it is in the case of the `real' \bison\ parsers and cannot be
omitted .  The name table it is responsible for setting up is used as
a set of keys for various associative arrays. Token declarations are
parsed by a bootstrap parser during the \TeX\ processing stage to
establish equivalences between the names kept in |yytname| and the
macro names used internally by the parsers built by \bison. The reason
this is necessary is not very complicated: either version of the token
name can be used in the grammar while the `driver' program
(\.{mkeparser.c}) only has access to the names in |yytname|. In
general, this is important whenever the grammar uses a different set of
token names from the lexer or when diagnostics messages are output. An
important case is the symbolic name switch: before the rules can be
listed to create the switch, the token numerical values must be
known. If the parser is only aware of the |yytname| listed names and the
grammar being parsed uses the `internal' names, the listing macros
will fail.  The array, |yytname| is used in a few functions inside the
`driver', as well, so omitting this option would make building the
parser impossible.
@<Bison options@>=
@G
%token-table
%debug
%start value
@g

@ To continue the token name discussion, this parser uses internal
names only so this is what will appear in the |yytname| array. No
bootstrapping is necessary. The typesetting of the tokens can be
adjusted using \.{\\prettywordpair} macros (see the included
\.{etoks.sty} file for examples).
@<Token and types declarations@>=
@G
%token IDENTIFIER
%token INTEGER
%token BOGUS
@g

@ Here is the whole grammar, simply additive expressions with two
levels of precedence.
@<Parser productions@>=
@G
value:
  expression                      {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
;

expression:
  term                            {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
| expression '+' term             {@> @<Add a term@> @=}
;

term:
  atom                            {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
| term '*' atom                   {@> @<Multiply by an atom@> @=}
;

atom:
  IDENTIFIER                      {@> @<Assign variable value to an atom@> @=}
| INTEGER                         {@> @<Assign value to an atom@> @=}
| '(' expression ')'              {@> TeX_( "/yy0{/the/yy(2)}" ); @=}
;
@g

@ @<Add a term@>=
  @[TeX_( "/tempca/the/yy(1)/relax" );@]@;
  @[TeX_( "/tempcb/the/yy(3)/relax" );@]@;
  @[TeX_( "/advance/tempca by /tempcb" );@]@;
  @[TeX_( "/yy0{/the/tempca}" );@]@;

@ @<Multiply by an atom@>=
  @[TeX_( "/tempca/the/yy(1)/relax" );@]@;
  @[TeX_( "/tempcb/the/yy(3)/relax" );@]@;
  @[TeX_( "/multiply/tempca by /tempcb" );@]@;
  @[TeX_( "/yy0{/the/tempca}" );@]@;

@ @<Assign variable value to an atom@>=
  @[TeX_( "/getsecond{/yy(1)}/to/toksa" );@]@;
  @[TeX_( "/toksb/expandafter/expandafter/expandafter{/expandafter" );@]@;
  @[TeX_( "    /number/csname/the/toksa/endcsname}" );@]@;
  @[TeX_( "/yy0{/the/toksb}" );@]@;

@ @<Assign value to an atom@>=
  @[TeX_( "/getfirst{/yy(1)}/to/toksa" );@]@;
  @[TeX_( "/yy0{/the/toksa}" );@]@;

@ \Cee\ preamble. In this case, there are no `real' actions that our
grammar performs, only \TeX\ output, so this section is empty.

@<\.{expression} parser \Cee\ preamble@>=

@ \Cee\ postamble. It is tricky to insert function definitions that use \bison's internal types,
as they have to be inserted in a place that is aware of the internal definitions but before said 
definitions are used.

@<\.{expression} parser \Cee\ postamble@>=

@ Union of types. Empty as well.

@<Union of parser types@>=

@** The lexer file. The scanner for the grammar above is even
simpler. Identifiers are interpreted as variable names that expand to
appropriate values.
%\checktabletrue
@(expl.ll@>=
@G
 @> @<Lexer definitions@> @= 
%{@> @<Lexer \Cee\ preamble@> @=%}
  @> @<Lexer options@> @= 
%%
  @> @<Regular expressions@> @= 
%%

@ @<Lexer definitions@>=
@G(fs1)
letter    [_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]
id        {letter}({letter}|[-0-9])*
int       [0-9]+
@g

@ @<Lexer \Cee\ preamble@>=

#include <stdint.h>
#include <stdbool.h>

  void define_all_states( void ){}

@ @<Lexer options@>=
@G(fs1)
%option bison-bridge
%option noyywrap nounput noinput reentrant 
%option noyy_top_state
%option debug
%option stack
%option outfile="expl.c"
@g

@ @<Regular expressions@>=
  @<Scan white space@>@;
  @<Scan identifiers@>@;

@ White space skipping. 
@<Scan white space@>=
@G(fs2)
[ \f\n\t\v]                        {@> @[TeX_( "/yylexnext" );@]@=}
@g

@ @<Scan identifiers@>=
@G(fs2)
{id}                               {@> @[TeX_( "/yylexreturnval{IDENTIFIER}" );@]@=}
{int}                              {@> @[TeX_( "/yylexreturnval{INTEGER}" );@]@=}
[+*()]                             {@> @[TeX_( "/yylexreturnchar" );@]@=}
.                                  {@> @[@<React to a bad character@>@]@=}
@g

@ @<React to a bad character@>=
 @[TeX_( "/iftracebadchars" );@]@;
 @[TeX_( "    /yycomplain{invalid character(s): /the/yytext}" );@]@;
 @[TeX_( "/fi" );@]@;
 @[TeX_( "/yyerrterminate" );@]@;

@*Test file. The test file includes a handy list of debugging options
that can be activated to see the inner workings of the parser and
scanner routines.
@(test.txx@>=
@G
\chardef\other=12 % needed for some macros to work
\input expression.sty

\iffalse
    \tracedfatrue
    \traceparserstatestrue
    \tracestackstrue
    \tracerulestrue
    \traceactionstrue
    \tracelookaheadtrue
    \traceparseresultstrue
    \tracebadcharstrue
    \yyflexdebugtrue
    \yyinputdebugtrue
\fi

\def\varone{10}
\def\expression{1 + 3 * ( 5 + 7 ) + varone}
\basicparserinit\expandafter\yyparse \expression \yyeof\yyeof\endparseinput\endparse

{
   \newlinechar`^^J
   \immediate\write16{^^Jexpression: \expression^^Jthe value: \the\yyval^^J^^J}
}

\bye
@g

@**Index.\global\let\secrangedisplay\empty% do not show the current section range anymore
\global\topskip=9pt
\def\Tex{\TeX\ output}
\def\TeXx{\TeX\ output}