\name{createSignatures4TB}
\alias{createSignatures4TB}

\title{Creates a set of transcriptional signatures from a microarray dataset.}
\description{This function is a wrapper to create sets of transcriptional signatures (as in the TranscriptomeBrowser Project, TBrowser,\url{http://tagc.univ-mrs.fr/tbrowser}). This function creates a "cdt" file containing a set of expression matrices (transcriptional signatures) separated by blank lines. Please note that it requires both MCL and Cluster 3.0 (see 'warnings section'). It accepts both a matrix or file name as input.
}


\usage{
createSignatures4TB(data = NULL, filename = NULL, path = ".", name = NULL, normalization = c("rank", "gaussian", "quantiles", "none"),
 distance.method = c("spearman", "pearson", "euclidean", "spm", "spgm"), silent = FALSE, verbose = TRUE, k = 150, random = 3, memory.used = 1024, fdr = 10,
 inflation = 2.0, median.center = FALSE, set.seed = 123, returnRank = FALSE)
}

\arguments{
  \item{data}{a \code{matrix}, \code{data.frame} or \code{ExpressionSet} object.}
  \item{filename}{a character string representing the file name to load.}
  \item{path}{a character string representing the data directory.}
  \item{name}{a prefix for the name of the created files.}
  \item{normalization}{indicates whether data should be normalized prior to analysis (see details).}
  \item{distance.method}{a method to compute the distance to the k-th nearest neighbor. One of "pearson" (Pearson's correlation coefficient-based distance), "spearman" (Spearman's rho-based distance), "euclidean", "spm" or "spgm". Note that the "spm" distance corresponds to the arithmetic mean of pearson- and spearman-based distance : ("pearson"+"spearman")/2 whereas "spgm" computes their geometric mean : sqrt("pearson"*"spearman").}
  \item{silent}{if set to TRUE, the progression of distance matrix calculation is not displayed.}
  \item{verbose}{if set to TRUE the function runs verbosely.}
  \item{k}{the neighborhood size.}
  \item{random}{the number of simulated distributions S to compute. By default \code{random = 3}.}
  \item{memory.used}{size of the memory used to store part of the distance matrix. The subsequent sub-matrix is used to computed simulated distances to the k-th nearest neighbor (see detail section of \code{\link{DBFMCL}} function).}
  \item{fdr}{an integer value corresponding to the false discovery rate (range: 0 to 100).}
  \item{inflation}{the main control of MCL. Inflation affects cluster granularity. It is usually chosen somewhere in the range \code{[1.2-5.0]}. \code{inflation = 5.0} will tend to result in fine-grained clusterings whereas \code{inflation = 1.2} will tend to result in very coarse grained clusterings. By default, \code{inflation = 2.0}. Default setting gives very good results for microarray data.}
  \item{median.center}{if set to TRUE, median-centering is applied to the rows of the matrix.}
  \item{set.seed}{specify seeds for random number generator.}
  \item{returnRank}{This argument modifies the output. Given a set of elements conserved after the filtering step of the DBFMCL algorithm, if \code{returnRank = TRUE} their expression values are replaced by their corresponding ranks in the input matrix.}
}

\details{
The Markov Cluster Algorithm was written by S. Van Dongen (see reference section). Cluster was originally written by Michael Eisen (\url{http://rana.lbl.gov/EisenSoftware.htm}). The command line version of Cluster version 3.0 was created by Michiel de Hoon, together with Seiya Imoto and Satoru Miyano. 
}

\section{Warnings}{
With the current implementation, this function works only on UNIX-like plateforms.

Cluster 3.0 should be installed in its command-line only version:

Please see \url{http://bonsai.ims.u-tokyo.ac.jp/~mdehoon/software/cluster/software.htm} for further informations.

\code{wget http://bonsai.ims.u-tokyo.ac.jp/~mdehoon/software/cluster/cluster-1.36.tar.gz}

\code{tar xvfz cluster-1.36.tar.gz}

\code{cd cluster-1.36/}

\code{./configure --without-x}

\code{make}

\code{sudo make install}

\code{# You should get cluster in your path}

\code{cluster -v}


MCL should be installed:

\code{# Download the latest version of mcl (the script has been tested successfully with the 06-058 version).}

\code{wget http://micans.org/mcl/src/mcl-latest.tar.gz}

\code{# Uncompress and install mcl}

\code{tar xvfz mcl-latest.tar.gz}

\code{cd mcl-xx-xxx}

\code{./configure}

\code{make}

\code{sudo make install}

\code{# You should get mcl in your path}

\code{mcl -h}


}

\references{
Lopez F.,Textoris J., Bergon A., Didier G., Remy E., Granjeaud S., Imbert J. , Nguyen C. and Puthier D. TranscriptomeBrowser: a powerful and 
flexible toolbox to explore productively the transcriptional landscape of the Gene Expression Omnibus database. PLoSONE, 2008;3(12):e4001.

Van Dongen S. (2000) A cluster algorithm for graphs. National Research Institute for Mathematics and Computer Science in the 1386-3681.

Open source clustering software. De Hoon MJ, Imoto S, Nolan J, Miyano S. Bioinformatics. 2004 Jun 12;20(9):1453-4.

Cluster analysis and display of genome-wide expression patterns. Eisen MB, Spellman PT, Brown PO, Botstein D. Proc Natl Acad Sci U S A. 1998 Dec 8;95(25):14863-8.
}

\author{Bergon A., Lopez F., Textoris J., Granjeaud S. and Puthier D.}
\seealso{\code{\link{DBFMCL}},\code{\link{heatmapFromCDT}},\code{\link{plotGeneExpProfiles}},\code{\link{getSignatures}},\code{\link{getExpressionMatrix}}}

\examples{
\dontrun{
## with an artificial dataset

m <- matrix(rnorm(80000), nc=20)
m[1:100,1:10] <- m[1:100,1:10] + 4
m[101:200,11:20] <- m[101:200,11:20] + 3
m[201:300,5:15] <- m[201:300,5:15] + -2

res <- createSignatures4TB(data = m, name="artificial", distance.method = "pearson", median.center=TRUE, k = 25)

plotGeneExpProfiles(res)
allsign <- heatmapFromCDT("artificial.dataMods.cdt")

plotGeneExpProfiles(res, signature=1)
heatmapFromCDT("artificial.dataMods.cdt", signature=1)

## with a real dataset
library(ALL)
data(ALL)
exp <- createSignatures4TB(data = ALL , name="ALLdataset", distance.method = "pearson", median.center=TRUE, k = 100)
plotGeneExpProfiles(exp, signatures=1)
plotGeneExpProfiles(res)
allsign <- heatmapFromCDT("ALLdataset.dataMods.cdt")
si1 <- heatmapFromCDT("ALLdataset.dataMods.cdt", signature=1)
}
}

\keyword{manip}