\name{fuzzy.ebam}
\alias{fuzzy.ebam}
\alias{fuzzy.stat}

\title{
EBAM and SAM for Fuzzy Genotype Calls
}
\description{
Computes the required statistics for an Empirical Bayes Analysis of Microarrays (EBAM; Efron et al., 2001) 
or a Significant Analysis of Microarrays (SAM; Tusher et al., 2001), respectively, based on the score statistic 
proposed by Louis et al. (2010) for fuzzy genotype calls or approximate Bayes Factors (Wakefield, 2007) determined
using this score statistic.

Should not be called directly, but via \code{ebam(..., method = fuzzy.ebam)} or \code{sam(..., method = fuzzy.stat)}, respectively. 
}
\usage{
fuzzy.ebam(data, cl, type = c("asymptotic", "permutation", "abf"), W = NULL, 
    logbase = exp(1), addOne = TRUE, df.ratio = NULL, n.interval = NULL, 
    df.dens = 5, knots.mode = TRUE, type.nclass = c("FD", "wand", "scott"), 
    fast = FALSE, B = 100, B.more = 0.1, B.max = 30000, n.subset = 10, rand = NA)
    
fuzzy.stat(data, cl, type = c("asymptotic", "permutation", "abf"), W = NULL, 
    logbase = exp(1), addOne = TRUE, B = 100, B.more = 0.1, B.max = 30000, 
    n.subset = 10, rand = NA)
}

\arguments{
  \item{data}{a matrix containing fuzzy genotype calls. Such a matrix can, e.g., be generated by the function 
     \code{\link[scrime]{getMatFuzzy}} based on the confidences for the three possible genotypes computed 
     by preprocessing algorithms such as CRLMM.
}
  \item{cl}{a vector of zeros and ones specifying which of the columns of \code{data} contains the fuzzy genotype calls for the
     cases (\code{1}) and which the controls (\code{0}). Thus, the length of \code{cl} must be equal to the number of columns of \code{data}.
}
  \item{type}{a character string specifying how the analysis should be performed. If \code{"asymptotic"}, the trend statistic of Louis et al. (2010)
     is used directly, and EBAM or SAM are performed assuming that under the null hypothesis this test statistic follows am asymptotic
     standard normal distribution. If \code{"permutation"}, a permutation procedure is employed to estimate the null distribution of this
     test statistic. If \code{"abf"}, Approximate Bayes Factors (ABF) proposed by Wakefield (2007) are determined from the trend statistic, 
     and EBAM or SAM are performed on these ABFs or transformations of these ABFs (see in particular \code{logbase} and \code{addOne}). In
     the latter case, again, a permutation procedure is used in EBAM and SAM to, e.g., compute posterior probabilities of association.  
}
  \item{W}{the prior variance. Must be either a positive value or a vector of length \code{nrow(data)} consisting of
     positive values. Ignored if \code{type = "asymptotic"} or \code{type = "permutation"}. For details, see 
     \code{\link[scrime]{abf}}.
}
  \item{logbase}{a numeric value larger than 1. If \code{type = "abf"}, then the ABFs are not directly used in the analysis, but a
     log-transformation (with base \code{logbase}) of the ABFs. If the ABFs should not be transformed, \code{logbase} can be set to \code{NA}.
     Ignored if \code{type = "asymptotic"} or \code{type = "permutation"}. 
}
  \item{addOne}{should 1 be added to the ABF before it is log-transformed? If \code{TRUE}, \code{log(ABF + 1, base=logbase)} is used as
     test score in EBAM or SAM. If \code{FALSE}, \code{log(ABF, base = logbase)} is considered. Only taken into account when 
     \code{type = "abf"} and \code{logbase} is not \code{NA}.
}
  \item{df.ratio}{integer specifying the degrees of freedom of the natural cubic
     spline used in the logistic regression with repeated observations for estimating the ratio \eqn{f_0/f}{f0/f}. Ignored
     if \code{type = "asymptotic"}. If not specified, \code{df.ratio} is set to \code{3} if \code{type = "abf"},
     and to \code{5} if \code{type = "permutation"}
}
  \item{n.interval}{the number of intervals used in the logistic regression with
     repeated observations (if \code{type = "permutation"} or \code{type = "abf"}), or in the Poisson regression used to estimate
     the density of the observed \eqn{z}-values (if \code{type = "asymptotic"}).
     If \code{NULL}, \code{n.interval} is estimated by the method specified by \code{type.nclass}, where at least 139 intervals
     are considered if \code{type = "permutation"} or \code{type = "abf"}.
}
  \item{df.dens}{integer specifying the degrees of freedom of the natural cubic
     spline used in the Poisson regression to estimate the density of the observed
     \eqn{z}-values in an application of \code{\link{ebam}} with \code{type = "asymptotic"}. Otherwise, ignored. 
}
  \item{knots.mode}{logical specifying whether the \code{df.dens} - 1 knots are centered around the
     mode and not the median of the density when fitting the Poisson regression model to estimate
     the density of the observed \eqn{z}-values in an application of \code{\link{ebam}} with \code{type = "asymptotic"}
     (for details on this density estimation, see \code{\link{denspr}}). Ignored if \code{type = "permutation"}
     or \code{type = "abf"}.
}
  \item{type.nclass}{character string specifying the procedure used to compute the
     number of cells of the histogram. Ignored if \code{type = "permutation"}, \code{type = "abf"}, or 
     \code{n.interval} is specified. Can be either \code{"FD"} (default), \code{"wand"}, or \code{"FD"}. 
     For details, see \code{\link{denspr}}.
}
  \item{fast}{if \code{FALSE} the exact number of permuted test scores that are
     more extreme than a particular observed test score is computed for each of
     the variables/SNPs. If \code{TRUE}, a crude estimate of this number is used.
}
  \item{B}{the number of permutations used in the estimation of the null distribution,
     and hence, in the computation of the expected \eqn{z}-values. Ignored if \code{type = "asymptotic"}.
}
  \item{B.more}{a numeric value. If the number of all possible permutations is smaller
     than or equal to (1+\code{B.more})*\code{B}, full permutation will be done. 
     Otherwise, \code{B} permutations are used.
}
  \item{B.max}{a numeric value. If the number of all possible permutations is smaller
     than or equal to \code{B.max}, \code{B} randomly selected permutations will be used
     in the computation of the null distribution. Otherwise, \code{B} random draws
     of the group labels are used.
}
  \item{n.subset}{a numeric value indicating in how many subsets the \code{B} 
     permutations are divided when computing the permuted \eqn{z}-values. Please note
     that the meaning of \code{n.subset} differs between the SAM and the EBAM functions.
}
  \item{rand}{numeric value. If specified, i.e. not \code{NA}, the random number generator
     will be set into a reproducible state.
}
}

\value{
A list containing statistics required by \code{ebam} or \code{sam}.}

\references{
Efron, B., Tibshirani, R., Storey, J.D., and Tusher, V. (2001). 
   Empirical Bayes Analysis of a Microarray Experiment, \emph{JASA}, 
   96, 1151-1160.
   
Louis, T.A., Carvalho, B.S., Fallin, M.D., Irizarry, R.A., Li, Q., and Ruczinski, I. (2010). 
   Association Tests that Accommodate Genotyping Errors. In Bernardo, J.M., Bayarri, M.J., 
   Berger, J.O., Dawid, A.P., Heckerman, D., Smith, A.F.M., and West, M. (eds.),
   \emph{Bayesian Statistics 9}, 393-420. Oxford University Press, Oxford, UK. With Discussion. 

Tusher, V.G., Tibshirani, R., and Chu, G. (2001). Significance Analysis of Microarrays
   Applied to the Ionizing Radiation Response. \emph{PNAS}, 98, 5116-5121.
   
Wakefield, J. (2007). A Bayesian Measure of Probability of False Discovery in Genetic 
   Epidemiology Studies. \emph{AJHG}, 81, 208-227.
}

\author{
Holger Schwender, \email{holger.schw@gmx.de}
}

\seealso{
\code{\link{ebam}}, \code{\link{sam}}, \code{\link{EBAM-class}}, \code{\link{SAM-class}}
}

\keyword{htest}