\name{emfit} \alias{emfit} \alias{emfit,ExpressionSet,character,ebarraysPatterns-method} \alias{emfit,ExpressionSet,ebarraysFamily,ebarraysPatterns-method} \alias{emfit,matrix,character,ebarraysPatterns-method} \alias{emfit,matrix,ebarraysFamily,ebarraysPatterns-method} \alias{show,ebarraysEMfit-method} \alias{ebarraysEMfit-class} \title{ Implements EM algorithm for gene expression mixture model } \description{ Implements the EM algorithm for gene expression mixture model } \synopsis{ emfit(data, family, hypotheses, ...) } \usage{ emfit(data, family, hypotheses, cluster, type=2, criterion="BIC", cluster.init = NULL, num.iter = 20, verbose = getOption("verbose"), optim.control = list(),...) } \arguments{ \item{data}{ a matrix} \item{family}{ an object of class ``ebarraysFamily'' or a character string which can be coerced to one. Currently, only the characters "GG" and "LNN", and "LNNMV" are valid. For LNNMV, a \code{groupid} is required. See below. Other families can be supplied by constructing them explicitly. } \item{hypotheses}{ an object of class ``ebarraysPatterns'' representing the hypotheses of interest. Such patterns can be generated by the function \code{\link{ebPatterns}} } \item{cluster}{if \code{type}=1, \code{cluster} is a vector specifying the fixed cluster membership for each gene; if \code{type}=2, \code{cluster} specifies the number of clusters to be fitted} \item{type}{if \code{type}=1, the cluster membership is fixed as input \code{cluster}; if \code{type}=2, fit the data with a fixed number of clusters} \item{criterion}{only used when \code{type}=2 and \code{cluster} contains more than one integers. All numbers of clusters provided in \code{cluster} will be fitted and the one that minimizes \code{criterion} will be returned. Possible values now are "BIC", "AIC" and "HQ"} \item{cluster.init}{only used when \code{type}=2. Specify the initial clustering membership.} \item{num.iter}{ number of EM iterations} \item{verbose}{ logical or numeric (0,1,2) indicating desired level of information printed for the user} \item{optim.control}{ list passed unchanged to \code{\link{optim}} for finer control} \item{...}{\code{groupid}: an integer vector indicating which group each sample belongs to, required in the ``LNNMV'' model. It does not depend on ``hypotheses''.} } \value{ an object of class ``ebarraysEMfit'', that can be summarized by \code{show()} and used to generate posterior probabilities using \code{\link{postprob}} } \author{Ming Yuan, Ping Wang, Deepayan Sarkar, Michael Newton, and Christina Kendziorski} \references{ Newton, M.A., Kendziorski, C.M., Richmond, C.S., Blattner, F.R. (2001). On differential variability of expression ratios: Improving statistical inference about gene expression changes from microarray data. Journal of Computational Biology 8:37-52. Kendziorski, C.M., Newton, M.A., Lan, H., Gould, M.N. (2003). On parametric empirical Bayes methods for comparing multiple groups using replicated gene expression profiles. Statistics in Medicine 22:3899-3914. Newton, M.A. and Kendziorski, C.M. Parametric Empirical Bayes Methods for Microarrays in The analysis of gene expression data: methods and software. Eds. G. Parmigiani, E.S. Garrett, R. Irizarry and S.L. Zeger, New York: Springer Verlag, 2003. Newton, M.A., Noueiry, A., Sarkar, D., and Ahlquist, P. (2004). Detecting differential gene expression with a semiparametric hierarchical mixture model. Biostatistics 5: 155-176. Yuan, M. and Kendziorski, C. (2006). A unified approach for simultaneous gene clustering and differential expression identification. Biometrics 62(4): 1089-1098. } \seealso{ \code{\link{ebPatterns}}, \code{\link{ebarraysFamily-class}} } \examples{ data(sample.ExpressionSet) ## from Biobase eset <- exprs(sample.ExpressionSet) patterns <- ebPatterns(c("1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1", "1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2")) gg.fit <- emfit(data = eset, family = "GG", hypotheses = patterns, verbose = TRUE) show(gg.fit) } \keyword{models}