\name{generateData} \alias{generateData} \title{Simulator for gene expression data} \description{ A simulator for gene expression data, whose values are normally distributed values with zero mean. The covariances are given by a configurable block-diagonal matrix. By default, half of the samples contain differential gene expression values (see parameter \code{diffsamples}). } \usage{ generateData(samples=50, genes=10000, diffgenes=200, blocksize=50, cov1=0.2, cov2=0, diff=0.6, diffsamples) } \arguments{ \item{samples}{ number of samples } \item{genes}{ number of gene expression values per sample } \item{diffgenes}{ number of differential genes for class 1 } \item{blocksize}{ size of each block in the blockdiagonal correlation matrix } \item{cov1}{ covariance within the blocks in the correlation matrix } \item{cov2}{ covariance between the blocks in the correlation matrix } \item{diff}{ difference between the random gene expression values and the differential gene expression values } \item{diffsamples}{ number of samples containing differential gene expression values compared to the rest (if missing, this parameter is set to half of the total number of samples) } } \details{ The simulator generates two labeled classes:\cr label 1: samples with differentially expressed genes. \cr label -1: samples without differentially expressed genes. } \value{ 'generateData' returns a list containing: \item{data}{a (samples x features)-matrix with the simulated gene expression values} \item{labels}{a vector with labels (1,-1) for the two classes} } \author{ Christoph Bartenhagen } \examples{ ## generate a dataset with 20 samples and 1.000 gene expression values d = generateData(samples=20, genes=1000, diffgenes=100, blocksize=10) data = d[[1]] labels = d[[2]] }