%\VignetteEngine{knitr::knitr}
%\VignetteIndexEntry{Workflow for Metabolomics}
%\VignetteKeywords{Mass Spectrometry, MS, MSMS, Metabolomics, Visualization}
%\VignettePackage{MetNet-vignette}

\documentclass[11pt,a4paper,english,arial,twoside]{article}

<<style-knitr, eval=TRUE, echo=FALSE, results="asis">>=
BiocStyle::latex()
@    


% 
\usepackage{caption}
\usepackage{subcaption}
% \usepackage{geometry}
% \geometry{verbose,
%     tmargin=25mm,
%     bmargin=25mm,
%     lmargin=25mm,
%     rmargin=25mm}
% \setlength\parindent{0pt}

\usepackage{amsmath,amsfonts,amssymb,amsthm}
\usepackage{mathtools}
\usepackage{textcomp}
\usepackage{longtable}
\usepackage{cite}
%\definecolor{red}{rgb}{1,0,0}
%\definecolor{blue}{rgb}{0,0,1}

%\usepackage{breakurl}
%\usepackage{hyperref}
% \hypersetup{%
%     pdfusetitle,
%   bookmarks = {true},
%     bookmarksnumbered = {true},
%     bookmarksopen = {true},
%     bookmarksopenlevel = 2,
%     unicode = {true},
%     breaklinks = {false},
%     hyperindex = {true},
%     colorlinks = {true},
%     linktocpage = {true},
%     plainpages = {false},
%     linkcolor = {blue},
%     citecolor = {blue},
%     urlcolor = {red},
%     pdfstartview = {Fit},
%     pdfpagemode = {UseOutlines},
%     pdfview = {XYZ null null null}
% }

\widowpenalty100000
\clubpenalty100000


% \newcommand\numberthis{\addtocounter{equation}{1}\tag{\theequation}}

%\usepackage[nottoc]{tocbibind}

% \usepackage[utf8]{inputenc}
% \usepackage{fancyhdr}

%\usepackage{graphicx}
%\usepackage[font=footnotesize]{subfig}

\usepackage[english]{babel}
\usepackage{color}

%\usepackage[backend=bibtex,natbib,style=authoryear,maxcitenames=2]{biblatex}
% \usepackage[backend=bibtex]{biblatex}
% \addbibresource{MetNet-citations.bib}

% \usepackage{setspace}
% \onehalfspacing

\author{Thomas Naake\thanks{\email{thomasnaake@googlemail.com}}\\
    Max Planck Institute of Molecular Plant Physiology 
    14476 Potsdam-Golm, Germany 
}


\bioctitle[MetNet: Inferring metabolic networks from untargeted 
high-resolution MS data]{MetNet: Inferring metabolic networks from untargeted 
high-resolution mass spectrometry data}
    

\begin{document}


\maketitle

\abstract{
A major bottleneck of mass spectrometry-based metabolomic analysis is 
still the rapid detection and annotation of unknown m/z features across 
biological matrices. Traditionally, the annotation was done manually 
imposing constraints in reproducibility and automatization. 
Furthermore, different analysis tools are typically used at different steps of 
analyses which requires parsing of data and changing of environments.
I present here \Rpackage{MetNet}, a novel \R{} package, that is compatible 
with the output of the \Biocpkg{xcms}/\Biocpkg{CAMERA} suite and that uses the 
data-rich output of mass spectrometry metabolomics to putatively 
link features on their relation to other features in the data set. 
\Rpackage{MetNet} uses both structural and quantitative information of 
metabolomics data for network inference that will guide metabolite annotation.

\packageVersion{\Sexpr{BiocStyle::pkg_ver("MetNet")}}
}

\section{Introduction}
Among the main challenges in mass spectrometric metabolomic analysis is the 
high-throughput analysis of metabolic features, their fast detection and 
annotation.
By contrast to the screening of known, previously characterized,
metabolic features in these data, the putative annotation of unknown
features is often cumbersome and requires a lot of manual work, hindering
the biological information retrieval of these data.
High-resolution mass spectrometric data is often very rich in information
content and metabolic conversions, and reactions can be derived from structural
properties of features \cite{Breitling2006}. 
In addition to that, statistical associations between
features (based on their intensity values) can be a valuable ressource to find 
co-synthesised or co-regulated metabolites, which are synthesised in the same
biosynthetic pathways. Given that an analysis tool within the \R{}~framework 
is still lacking that is
integrating the two features of mass spectrometric information commonly
acquired with mass spectrometers (m/z and intensity values), I developed
\Rpackage{MetNet} to close this gap.
The \Rpackage{MetNet} package comprises functionalities to infer network
topologies from high-resolution mass spectrometry data. \Rpackage{MetNet}
combines information from both structural data (differences in m/z values
of features) and statistical associations (intensity values of features per
sample) to propose putative metabolic networks that can be used for further
exploration. \newline

The idea of using high-resolution mass spectrometry data for network 
construction was first proposed in \cite{Breitling2006} and followed soon 
afterwards by a Cytoscape plugin, MetaNetter \cite{Jourdan2007}, that 
is based on the inference of metabolic networks on molecular weight differences
and correlation (Pearson correlation and partial correlation). 

Inspired by the paper of \cite{Marbach2012} different algorithms for network 
were implemented in \Rpackage{MetNet} to account for 
biases that are inherent in these statistical methods, followed by the 
calculation of a consensus adjacency matrix using the differently computed 
individual adjacency matrices. \newline

The two main functionalities of the package include the creation of an
adjacency matrix from structual properties, based on losses/addition of
functional groups defined by the user, and statistical associations. Currently,
the following statistical models are implemented to infer a statistical
adjacency matrix: Least absolute shrinkage and selection operator
(LASSO, L1-norm regression, \cite{Tibshirani1994}), Random Forest 
\cite{Breiman2001}, Pearson and Spearman correlation (including partial and
semipartial correlation, see \cite{Steuer2006} 
for a discussion on correlation-based metabolic networks), context likelihood 
of relatedness (CLR, \cite{Faith2007}), the algorithm for the reconstruction 
of accurate cellular networks (ARACNE, \cite{Margolin2006}) and 
constraint-based structure learning (Bayes, \cite{Scutari2010}). 
Since all of these methods have
advantages and disadvantages, the user has the possibility to select
several of these methods, compute adjacency matrices from these models and
create a consensus matrix from the different statistical frameworks. \newline

After creating the statistical and structural adjaceny matrices these two 
matrices can be combined to form a consensus matrix that has both information 
from structural and statistical properties of the data. This can be followed 
by further network analyses (e.g. calculation of topological parameters),
integration with other data sources (e.g. genomic information or
transcriptomic data) and/or visualization. \newline

\Rpackage{MetNet} is currently under active development. If you
discover any bugs, typos or develop ideas of improving
\Rpackage{MetNet} feel free to raise an issue via
\href{https://github.com/tnaake/MetNet}{GitHub} or
send a mail to the developer.

<<knitr, include=FALSE, cache=FALSE>>=
library("knitr")
@

\section{Prepare the environment and load the data}
To install \Rpackage{MetNet} enter the following to the \R{}~console
<<install,eval=FALSE>>=
install.packages("BiocManager")
BiocManager::install("MetNet") 
@

Before starting with the analysis, load the \Rpackage{MetNet} package. This 
will also load the required packages \CRANpkg{glmnet}, \CRANpkg{stabs},
\CRANpkg{randomForest}, \CRANpkg{rfPermute}, \CRANpkg{mpmi},
\CRANpkg{parmigene}, \CRANpkg{WGCNA} and \CRANpkg{bnlearn} that are needed
for functions in the statistical adjacency matrix inference.
<<load_MetNet,eval=TRUE>>=
library(MetNet)
@

The data format that is compatible with the \Rpackage{MetNet} framework is
in the \Biocpkg{xcms}/\Biocpkg{CAMERA} output-like $m~x~n$ matrix, where
columns denote the different samples $n$ and where $m$ features are present.
In such a matrix, information about the masses of the features and quantitative
information of the features (intensity or concentration values) are needed. 
The information about the m/z values has to be stored in a vector of
length $\vert m \vert$ in the column \Rcode{"mz"}. \newline

\Rpackage{MetNet} does not impose any requirements for
data normalization, filtering, etc. However, the user has to make sure that
the data is properly preprocessed. These include division by internal standard,
\Rcode{log2} transformation, noise filtering, removal of features that do not 
represent mass features/metabolites, removal of isotopes, etc. \newline

We will load here the object \Robject{x\_test} that contains m/z values
(in the column \Rcode{"mz"}), together with the corresponding retention time
(in the column \Rcode{"rt"}) and intensity values. We will use here the object
\Robject{x\_test} for guidance through the workflow of \Rpackage{MetNet}.

<<data,eval=TRUE,echo=TRUE>>=
data("x_test", package="MetNet")
x_test <- as.matrix(x_test)
@

\section{Creating the structural matrix}

The function \Rfunction{createStructuralAdjacency} will create the adjacency
matrix based on structual properties (m/z values) of the features.
The function expects a matrix with a column \Rcode{"mz"} that contains the 
mass information of a feature (typically the m/z value). Furthermore,
\Rfunction{createStructuralAdjacency} takes a \Robject{data.frame}
object as argument \Robject{transformations} with the \Rcode{colnames}
\Rcode{"mass"}, \Rcode{"name"} and additional columns (e.g. \Rcode{"formula"}). 
\Rfunction{createStructuralAdjacency} looks for transformation (in the 
sense of additions/losses of functional groups mediated by biochemical,
enzymatic reactions) in the data using the mass information. \newline

Following the work of \cite{Breitling2006} and \cite{Jourdan2007}, 
molecular weight difference w$_{X}$ is defined by 
\begin{equation}
    w_X = \vert w_A - w_B \vert 
\end{equation}
where w$_{A}$ is the molecular weight
of substrate A, and w$_{B}$ is the molecular weight of product B 
(typically, m/z values will be used as a proxy for the molecular weight since 
the molecular weight is not directly derivable from mass spectrometric data). 
As examplified in \cite{Jourdan2007} specific enzymatic reactions refer to 
specific changes in the molecular weight, e.g. carboxylation reactions 
will result in a mass difference of 43.98983 (molecular weight of CO$_2$) 
between metabolic features. 

The search space for these transformation is adjustable by the 
\Robject{transformation} argument in 
\Rfunction{createStructuralAdjacency} allowing to look for specific 
enzymatic transformations in mind. Hereby,
\Rfunction{createStructuralAdjacency} will take into account the 
\Robject{ppm} value, to adjust for inaccuracies in m/z values due to technical 
reasons according to the formula

\begin{equation}
    ppm = \frac{m_{exp} - m_{calc}}{m_{exp}} \cdot 10^{-6}
\end{equation}

with $m_{exp}$ the experimentally determined m/z value and $m_{calc}$ the
calculated accurate mass of a molecule. Within the function, a lower and upper
range is calculated depending on the supplied \Robject{ppm} value, differences
between the m/z feature values are calculated and matched against the
\Rcode{"mass"}es of the \Robject{transformations} argument. If any
of the additions/losses defined in \Robject{transformations} is found in the
data, it will be reported as an (unweighted) connection in the
returned adjacency matrix. Together with the adjacency matrix the type of
connection (derived from the column \Rcode{"name"} in the
\Robject{transformations}) will be written to a character matrix. These
two matrices will be returned as a list (first entry: numerical adjacency
matrix, second entry: character matrix) by the function
\Rfunction{createStructuralAdjacency}. \newline

Before calculating the structural matrix, one must define the search space, 
i.e. these transformation that will be looked for in the mass spectrometric
data by creating the \Robject{transformations} object. 

<<transformation_example,echo=TRUE,eval=TRUE>>=
## define the search space for biochemical transformation 
transformations <- rbind(
    c("Hydroxylation (-H)", "O", 15.9949146221, "-"),
    c("Malonyl group (-H2O)", "C3H2O3", 86.0003939305, "?"),
    c("C6H10O6", "C6H10O6", 178.0477380536, "-"),
    c("D-ribose (-H2O) (ribosylation)", "C5H8O4", 132.0422587452, "-"),
    c("Disaccharide (-H2O)", "C12H20O11", 340.1005614851, "-"),
    c("Glucuronic acid (-H2O)", "C6H8O6", 176.0320879894, "?"),
    c("Monosaccharide (-H2O)", "C6H10O5", 162.0528234315, "-"),
    c("Rhamnose (-H20)", "C6H10O4", 146.057910, "-"),
    c("Trisaccharide (-H2O)", "C18H30O15", 486.1584702945, "-"),
    c("coumaroyl (-H2O)", "C9H6O2", 146.0367794368, "?"),
    c("feruloyl (-H2O)", "C9H6O2OCH2", 176.0473441231, "?"),
    c("sinapoyl (-H2O)", "C9H6O2OCH2OCH2", 206.0579088094, "?"),
    c("putrescine to spermidine (+C3H7N)", "C3H7N", 57.0578492299, "?"))

## convert to data frame
transformations <- data.frame(group=transformations[,1],
                            formula=transformations[,2],
                            mass=as.numeric(transformations[,3]),
                            rt=transformations[,4])
@

The function \Rfunction{createStructuralAdjacency} will then check for those 
m/z differences that are stored in the column \Rcode{"mass"} in the 
object \Robject{transformations}. To create the adjacency matrix derived 
from these structural information we enter

<<structure,eval=TRUE,echo=TRUE>>=
struct_adj <- createStructuralAdjacency(x=x_test, 
                                transformation=transformations, ppm=10)
@
in the \R~console. 

\subsection*{Refining the structural adjacency matrix (optional)}
Depending on the chemical group added the retention time will differ depending
on the chemical group added, e.g. an addition of a glycosyl group will 
usually result in a lower retentiom time in reverse-phase chromatography-
This information can be used in refining the adjacency matrix derived from 
the structural matrix. The \Rfunction{rtCorrection} does this checking, if
predicted transformation correspond to the expected retention time shift, 
in an automated fashion. It requires information about the expected retention
time shift in the \Robject{data.frame} passed to the \Robject{transformation}
argument (in the \Rcode{"rt"} column). Within this columns, information about
retention time shifts is encoded by \Rcode{"-"}, \Rcode{"+"} and \Rcode{"?"}, 
which means the feature with higher m/z value has lower, higher or unknown 
retention time than the feature with the lower m/z value. The values for 
m/z and retention time will be taken from the object passed to the 
\Robject{x} argument. In case there is a discrepancy between the transformation
and the retention time shift the adjacency matrix at the specific position
will be set to 0. \Rfunction{rtCorrection} will return the 
updated adjacency matrix and the updated character matrix with the descriptions 
of the transformation. 

To account for retention time shifts we enter

<<rt_correction,eval=TRUE,echo=TRUE>>=
struct_adj <- rtCorrection(struct_adj=struct_adj, 
                                x=x_test, transformation=transformations)
@
in the \R~console. 


\section{Creating the statistical matrix}

The function \Rfunction{createStatisticalAdjacency} will create the adjacency
matrix based on statistical associations. 
\Rfunction{createStatisticalAdjacency} is a wrapper function for the 
functions \Rfunction{createStatisticalAdjacencyList} and 
\Rfunction{consensusAdjacency}. The former function will create a 
list of adjacency matrices using the statistical models defined by the 
\Robject{model} argument. Currently, the models LASSO (using \CRANpkg{stabs},
\cite{Hofner2015,Thomas2017}), Random Forest (using \CRANpkg{rfPermute}, 
CLR, ARACNE (the two latter using the package \CRANpkg{mpmi} to calculate
Mutual Information using a nonparametric bias correction by 
Bias Corrected Mutual Information, and the functions \Rfunction{clr} and 
\Rfunction{aracne.a} from the \CRANpkg{parmigene} package), Pearson and 
Spearman correlation (based on the 
\CRANpkg{WGCNA} package, \cite{Langfelder2008}), partial and semipartial 
Pearson and Spearman correlation (using the \CRANpkg{ppcor} package) and 
constraint-based structure 
learning based on the Fast Incremental Association (Fast-IAMB, algorithm 
from the \CRANpkg{bnlearn} package, \cite{Scutari2010}). 

For further information on the different models 
take a look on the respective help pages of \Rcode{lasso}, 
\Rcode{randomForest}, \Rcode{clr}, \Rcode{aracne}, \Rcode{correlation} and/or 
\Rcode{bayes}. Arguments that are accepted by the respective underlying 
functions can be passed directly to the \Rfunction{createStatisticalAdjacency} 
and \Rfunction{createStatisticalAdjacencyList} functions. In addition, 
arguments that are defined in the functions \Rcode{lasso}, 
\Rcode{randomForest}, \Rcode{clr}, \Rcode{aracne}, \Rcode{correlation} and/or 
\Rcode{bayes} can be passed to the functions. \newline

From the list of adjacency matrices the function \Rfunction{consensusAdjacency}
will create a consensus adjacency matrix using the employed statistical models.
The reasoning behind this step is to circumvent disadvantages arising from each 
model and creating a statistically reliable topology that reflects the actual 
metabolic relations. To calculate the consensus adjacency matrix, the 
\Rfunction{consensus} function from the \CRANpkg{sna} \cite{Butts2016} is 
employed. The arguments that are accepted by this function can be passed to the 
\Rfunction{consensusAdjacency} and \Rfunction{createStatisticalAdjacency} 
function, respectively. Furthermore, in case a method other than 
\Rcode{"central.graph"} is used the argument \Robject{threshold} will define if 
the value $a_{i,j}$ of the consensus adjacency matrix 
will be reported as a connection in the returned matrix 
(if $a_{i,j} \geq$ \Rcode{threshold})
or not. \Rfunction{createStatisticalAdjacency} and 
\Rfunction{consensusAdjacency} will return an unweighted adjancency matrix 
with connections inferred from the respective models. \newline

In the following example, we will create a consensus adjacency matrix using 
Pearson and Spearman correlation using the intensity values as input data. 
The p-values that will be used for assigning edges in the unweighted 
adjacency matrix will be adjusted by the Benjamini \& Hochberg (False Discovery 
rate) method and the default q-value of 0.05. 
<<statistical,eval=TRUE,echo=TRUE>>=
x_int <- x_test[,3:dim(x_test)[2]]
stat_adj <- createStatisticalAdjacency(x_int, 
                model=c("pearson", "spearman"), correlation_adjust="BH")
@

To create the same adjacency matrix without using the wrapper function, one can
call the two functions \Rfunction{createStatisticalAdjacencyList} and 
\Rfunction{consensusAdjacency} individually: 
<<statistical_stepwise,eval=TRUE,echo=TRUE>>= 
l <- createStatisticalAdjacencyList(x_int, 
    model=c("pearson", "spearman"), correlation_adjust="BH")
stat_adj <- consensusAdjacency(l=l)
@

\section{Combining the structural and statistical matrix}
After creating the structural and statistical matrix, it is time to combine 
these two matrices. The function \Rfunction{combine\_structural\_statistical} 
will combine the matrices to the consensus matrix. The function accepts 
the arguments \Rcode{structure} and \Rcode{statistical} for the two matrices,
respectively, and the argument \Rcode{threshold}, that is a numerical value 
(default=1). After adding the matrices, the entries will be checked if 
they are greater or equal than \Rcode{threshold} and 1 or 0 will be returned,
respectively. The argument \Rcode{threshold} needs to be adjusted by the user 
if another \Rcode{method} than \Rcode{"central.graph"} in 
\Rfunction{createStatisticalAdjacency}/\Rfunction{consensusAdjacency} is 
used. 

<<combine,eval=TRUE,echo=TRUE>>=
cons_adj <- combineStructuralStatistical(structure=struct_adj[[1]],
                                    statistical=stat_adj)
@

\section{Visualization and further analyses}
To display the created consensus adjacency matrix, existing visualisation 
tools available in the \R~framework can be employed or any other visualisation 
tool after exporting the consensus matrix as a text file. In this example 
We will use the \CRANpkg{igraph} \cite{Csardi2006} package to visualize the 
adjacency matrix. 

<<visualisation,eval=TRUE,echo=TRUE,fig.show='hide'>>=
g <- igraph::graph_from_adjacency_matrix(cons_adj, mode="undirected")
plot(g, edge.width=5, vertex.label.cex=0.5, edge.color="grey")
@

\begin{figure}[t!]
    \center
    \includegraphics{./figure/visualisation-1}
    \caption{\textbf{\textit{Ab initio} network inferred from structural 
    and quantitative mass spectrometry data.} Verteces are connected 
    that are separated by given metabolic transformation and statistical 
    association.}
\end{figure}

Furthermore, the network can be analysed by network analysis techniques
(topological parameters such as centrality, degree, clustering indices) that 
are implemented in different packages in \R 
(e.g. \CRANpkg{igraph} or \CRANpkg{sna}) or other software tools outside of 
the \R~environment. 

\newpage
\newpage

\bibliography{MetNet-citations}

\newpage 
\section*{Appendix}

\subsection*{Session information}

All software and respective versions to build this vignette are listed here:
<<session,eval=TRUE,echo=FALSE>>=
sessionInfo()
@

\newpage

\subsection*{Transformations}

The list of transformations is taken from \cite{Breitling2006}. 
The numerical m/z values were calculated by using the structural formula and 
the Biological Magnetic Resonance Data Bank
\href{http://www.bmrb.wisc.edu/metabolomics/mol_mass.php}{web tool}. 
<<transformations,eval=TRUE,echo=TRUE>>=
transformations <- rbind(
    c("Alanine", "C3H5NO", "71.0371137878"),
    c("Arginine", "C6H12N4O", "156.1011110281"),
    c("Asparagine", "C4H6N2O2", "114.0429274472"),
    c("Guanosine 5-diphosphate (-H2O)", "C10H13N5O10P2", "425.0137646843"),
    c("Guanosine 5-monophosphate (-H2O)", "C10H12N5O7P", "345.0474342759"),
    c("Guanine (-H)", "C5H4N5O", "150.0415847765"),
    c("Aspartic acid", "C4H5NO3", "115.0269430320"),
    c("Guanosine (-H2O)", "C10H11N5O4", "265.0811038675"),
    c("Cysteine", "C3H5NOS", "103.0091844778"),
    c("Deoxythymidine 5'-diphosphate (-H2O)", "C10H14N2O10P2", "384.01236770"),
    c("Cystine", "C6H10N2O3S2", "222.0132835777"),
    c("Thymidine (-H2O)", "C10H12N2O4", "224.0797068840"),
    c("Glutamic acid", "C5H7NO3", "129.0425930962"),
    c("Thymine (-H)", "C5H5N2O2", "125.0351024151"),
    c("Glutamine", "C5H8N2O2", "128.0585775114"),
    c("Thymidine 5'-monophosphate (-H2O)", "C10H13N2O7P", "304.0460372924"),
    c("Glycine", "C2H3NO", "57.0214637236"),
    c("Uridine 5'-diphosphate (-H2O)", "C9H12N2O11P2", "385.9916322587"),
    c("Histidine", "C6H7N3O", "137.0589118624"),
    c("Uridine 5'-monophosphate (-H2O)", "C9H11N2O8P", "306.0253018503"),
    c("Isoleucine", "C6H11NO", "113.0840639804"),
    c("Uracil (-H)", "C4H3N2O2", "111.0194523509"),
    c("Leucine", "C6H11NO", "113.0840639804"),
    c("Uridine (-H2O)", "C9H10N2O5", "226.0589714419"),
    c("Lysine", "C6H12N2O", "128.0949630177"),
    c("Acetylation (-H)", "C2H3O2", "59.0133043405"),
    c("Methionine", "C5H9NOS", "131.0404846062"),
    c("Acetylation (-H2O)", "C2H2O",  "42.0105646863"),
    c("Phenylalanine", "C9H9NO",  "147.0684139162"),
    c("C2H2", "C2H2", "26.0156500642"),
    c("Proline", "C5H7NO", "97.0527638520"),
    c("Carboxylation", "CO2", "43.9898292442"),
    c("Serine", "C3H5NO2", "87.0320284099"),
    c("CHO2", "CHO2", "44.9976542763"),
    c("Threonine",  "C4H7NO2",  "101.0476784741"),
    c("Condensation/dehydration", "H2O", "18.0105646863"),
    c("Tryptophan", "C11H10N2O",  "186.0793129535"),
    c("Diphosphate", "H3O6P2", "160.9404858489"),
    c("Tyrosine", "C9H9NO2", "163.0633285383"),
    c("Ethyl addition (-H2O)", "C2H4", "28.0313001284"),
    c("Valine", "C5H9NO",  "99.0684139162"),
    c("Formic Acid (-H2O)", "CO", "27.9949146221"),
    c("Acetotacetate (-H2O)",  "C4H4O2", "84.0211293726"),
    c("Glyoxylate (-H2O)", "C2O2",  "55.9898292442"),
    c("Acetone (-H)", "C3H5O", "57.0340397826"),
    c("Hydrogenation/dehydrogenation", "H2", "2.0156500642"),
    c("Adenylate (-H2O)", "C10H12N5O6P", "329.0525196538"),
    c("Hydroxylation (-H)", "O", "15.9949146221"),
    c("Biotinyl (-H)", "C10H15N2O3S", "243.0803380482"),
    c("Inorganic phosphate", "P", "30.9737615100"),
    c("Biotinyl (-H2O)", "C10H14N2O2S", "226.0775983940"),
    c("Ketol group (-H2O)", "C2H2O", "42.0105646863"),
    c("Carbamoyl P transfer (-H2PO4)", "CH2ON", "44.0136386915"),
    c("Methanol (-H2O)", "CH2", "14.0156500642"),
    c("Co-enzyme A (-H)", "C21H34N7O16P3S", "765.0995583014"),
    c("Phosphate", "HPO3", "79.9663304084"),
    c("Co-enzyme A (-H2O)", "C21H33N7O15P3S", "748.0968186472"),
    c("Primary amine", "NH2", "16.0187240694"),
    c("Glutathione (-H2O)", "C10H15N3O5S", "289.0732412976"),
    c("Pyrophosphate", "PP", "61.9475230200"),
    c("Isoprene addition (-H)", "C5H7", "67.0547752247"),
    c("Secondary amine", "NH", "15.0108990373"),
    c("Malonyl group (-H2O)", "C3H2O3", "86.0003939305"),
    c("Sulfate (-H2O)", "SO3", "79.9568145563"),
    c("Palmitoylation (-H2O)", "C16H30O", "238.2296655851"),
    c("Tertiary amine", "N", "14.0030740052"),
    c("Pyridoxal phosphate (-H2O)", "C8H8NO5P", "229.0140088825"),
    c("C6H10O5", "C6H10O5", "162.0528234315"),
    c("Urea addition (-H)", "CH3N2O", "59.0245377288"),
    c("C6H10O6", "C6H10O6", "178.0477380536"),
    c("Adenine (-H)", "C5H4N5", "134.0466701544"),
    c("D-ribose (-H2O) (ribosylation)", "C5H8O4", "132.0422587452"),
    c("Adenosine (-H2O)", "C10H11N5O3", "249.0861892454"),
    c("Disaccharide (-H2O)", "C12H20O11", "340.1005614851"),
    c("Adenosine 5'-diphosphate (-H2O)", "C10H13N5O9P2", "409.0188500622"),
    c("Glucose-N-phosphate (-H2O)", "C6H11O8P", "242.0191538399"),
    c("Adenosine 5'-monophosphate (-H2O)", "C10H12N5O6P", "329.0525196538"),
    c("Glucuronic acid (-H2O)", "C6H8O6", "176.0320879894"),
    c("Cytidine 5'-diphosphate (-H2O)", "C9H13N3O10P2", "385.0076166739"),
    c("Monosaccharide (-H2O)", "C6H10O5", "162.0528234315"),
    c("Cytidine 5'-monophsophate (-H2O)", "C9H12N3O7P", "305.0412862655"),
    c("Trisaccharide (-H2O)", "C18H30O15", "486.1584702945"),
    c("Cytosine (-H)", "C4H4N3O",  "110.0354367661"))

transformations <- data.frame(name=transformations[,1], 
            formula=transformations[,2],
            mass=as.numeric(transformations[,3]))
@

\end{document}