\name{twostepsva.build} \alias{twostepsva.build} \title{Build surrogate variables from gene expression and model data} \description{ Construct a specified number of surrogate variables from a gene expression data set based on the two-step algorithm of Leek and Storey (2007). } \usage{ twostepsva.build(dat, mod, n.sv) } \arguments{ \item{dat}{A m genes by n arrays matrix of expression data} \item{mod}{A n by k model matrix corresponding to the primary model fit (see model.matrix)} \item{n.sv}{The number of surrogate variables to construct.} } \details{ The SVA estimation algorithm is described in Leek and Storey (2007).The basic idea is to estimate surrogate variables based on the subset of rows affected by unmodeled dependence. } \value{ A list containing: \item{sv}{A n by n.sv matrix where each column is a distinct surrogate variable (the main quantity of interest)} \item{pprob.gam}{A vector indicating whether each row was used in the building of the surrogate variables. 1= row used, 0=not used.} \item{pprob.b}{Null for two-step SVA, see irwsva.build for more info.} \item{n.sv}{The number of suggorate variables estimated. } } \references{ Leek JT and Storey JD. (2007) Capturing heterogeneity in gene expression studies by surrogate variable analysis. PLoS Genetics, 3: e161. \url{http://www.biostat.jhsph.edu/~jleek/publications.html} sva Vignette \url{http://www.biostat.jhsph.edu/~jleek/sva/} } \author{Jeffrey T. Leek \email{jleek@jhsph.edu}, John Storey \email{jstorey@princeton.edu}} \seealso{\code{\link{sva}}, \code{\link{num.sv}},\code{\link{irwsva.build}},\code{\link{ComBat}}} \examples{ \dontrun{ ## Load data library(bladderbatch) data(bladderdata) ## Obtain phenotypic data pheno = pData(bladderEset) edata = exprs(bladderEset) batch = pheno$batch mod = model.matrix(~as.factor(cancer), data=pheno) mod0 = model.matrix(~1, data=pheno) ## Construct the surrogate variables svaobj <- twostepsva.build(edata,mod,n.sv=1) ## Include them in a downstream analysis mod.sv <- cbind(mod,svaobj$sv) mod0.sv <- cbind(mod0,svaobj$sv) adjusted.pvals <- f.pvalue(dat,mod.sv,mod0.sv) } } \keyword{misc}