\name{T.fit}
\alias{T.fit}

\title{ Makes a stepwise regression fit for time series gene expression experiments }
\description{
     \code{T.fit} selects the best regression model for each gene using stepwise regression.
}
\usage{
T.fit(data, design = data$dis, step.method = "backward", 
      min.obs = data$min.obs, alfa = data$Q, nvar.correction = FALSE)
}

\arguments{
  \item{data}{ can either be a \code{\link{p.vector}} object or a matrix containing expression data with the same requirements as for
          the \code{\link{p.vector}} function}
  \item{design}{ design matrix for the regression fit such as that generated by the \code{\link{make.design.matrix}} function. If data is
          a \code{\link{p.vector}} object, the same design matrix is used by default }
  \item{step.method}{argument to be passed to the step function. Can be either \code{"backward"}, \code{"forward"}, \code{"two.ways.backward"} or \code{"two.ways.forward"} }
  \item{min.obs}{ genes with less than this number of true numerical values will be excluded from the analysis }
  \item{alfa}{significance level used for variable selection in the stepwise regression }
  \item{nvar.correction}{argument for correcting T.fit significance level. See details}
}
\details{
     In the maSigPro approach \code{\link{p.vector}} and \code{\link{T.fit}} are subsequent steps, meaning that significant genes are
     first selected on the basis of a general model and then the significant variables for each gene are found by step-wise regression. 

     The step regression can be \code{"backward"} or \code{"forward"} indicating whether the step procedure starts from the 
     model with all or none variables. With the \code{"two.ways.backward"} or \code{"two.ways.forward"} options the variables are both allowed to get in and out.
     At each step the p-value of each variable is computed and variables get in/out the model when this p-value is
     lower or higher than given threshold alfa. When nva.correction is TRUE the given significance level is corrected by the number of variables in the model
}
\value{
  \item{sol}{matrix for summary results of the stepwise regression. For each selected gene the following values are given:}
\newline \qquad {p-value of the regression ANOVA}
\newline \qquad {R-squared of the model}
\newline \qquad {p-value of the regression coefficients of the selected variables}
  \item{sig.profiles}{expression values for the genes contained in \code{sol}}
  \item{coefficients}{matrix containing regression coefficients for the adjusted models}
  \item{groups.coeffs}{matrix containing the coefficients of the impiclit models of each experimental group}
  \item{variables}{variables in the complete regression model}
  \item{G}{total number of input genes}
  \item{g}{number of genes taken in the regression fit}
  \item{dat}{input analysis data matrix}
  \item{dis}{regression design matrix}
  \item{step.method}{imputed step method for stepwise regression}
  \item{edesign}{matrix of experimental design}
  \item{influ.info}{data frame of genes containing influencial data}
}
\references{Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006.
maSigPro: a Method to Identify Significant Differential Expression Profiles in Time-Course Microarray Experiments. 
Bioinformatics 22, 1096-1102
}
\author{Ana Conesa, aconesa@ivia.es; Maria Jose Nueda, mj.nueda@ua.es}

\seealso{\code{\link{p.vector}}, \code{\link{step}}}
\examples{

#### GENERATE TIME COURSE DATA
## generate n random gene expression profiles of a data set with 
## one control plus 3 treatments, 3 time points and r replicates per time point.

tc.GENE <- function(n, r,
             var11 = 0.01, var12 = 0.01,var13 = 0.01,
             var21 = 0.01, var22 = 0.01, var23 =0.01,
             var31 = 0.01, var32 = 0.01, var33 = 0.01,
             var41 = 0.01, var42 = 0.01, var43 = 0.01,
             a1 = 0, a2 = 0, a3 = 0, a4 = 0,
             b1 = 0, b2 = 0, b3 = 0, b4 = 0,
             c1 = 0, c2 = 0, c3 = 0, c4 = 0)
{

  tc.dat <- NULL
  for (i in 1:n) {
    Ctl <- c(rnorm(r, a1, var11), rnorm(r, b1, var12), rnorm(r, c1, var13))  # Ctl group
    Tr1 <- c(rnorm(r, a2, var21), rnorm(r, b2, var22), rnorm(r, c2, var23))  # Tr1 group
    Tr2 <- c(rnorm(r, a3, var31), rnorm(r, b3, var32), rnorm(r, c3, var33))  # Tr2 group
    Tr3 <- c(rnorm(r, a4, var41), rnorm(r, b4, var42), rnorm(r, c4, var43))  # Tr3 group
    gene <- c(Ctl, Tr1, Tr2, Tr3)
    tc.dat <- rbind(tc.dat, gene)
  }
  tc.dat
}

## Create 270 flat profiles
flat <- tc.GENE(n = 270, r = 3)
## Create 10 genes with profile differences between Ctl and Tr1 groups
twodiff <- tc.GENE (n = 10, r = 3, b2 = 0.5, c2 = 1.3)
## Create 10 genes with profile differences between Ctl, Tr2, and Tr3 groups
threediff <- tc.GENE(n = 10, r = 3, b3 = 0.8, c3 = -1, a4 = -0.1, b4 = -0.8, c4 = -1.2)
## Create 10 genes with profile differences between Ctl and Tr2 and different variance
vardiff <- tc.GENE(n = 10, r = 3, a3 = 0.7, b3 = 1, c3 = 1.2, var32 = 0.03, var33 = 0.03)
## Create dataset
tc.DATA <- rbind(flat, twodiff, threediff, vardiff)
rownames(tc.DATA) <- paste("feature", c(1:300), sep = "")
colnames(tc.DATA) <- paste("Array", c(1:36), sep = "")
tc.DATA [sample(c(1:(300*36)), 300)] <- NA  # introduce missing values

#### CREATE EXPERIMENTAL DESIGN
Time <- rep(c(rep(c(1:3), each = 3)), 4)
Replicates <- rep(c(1:12), each = 3)
Control <- c(rep(1, 9), rep(0, 27))
Treat1 <- c(rep(0, 9), rep(1, 9), rep(0, 18))
Treat2 <- c(rep(0, 18), rep(1, 9), rep(0,9))
Treat3 <- c(rep(0, 27), rep(1, 9))
edesign <- cbind(Time, Replicates, Control, Treat1, Treat2, Treat3)
rownames(edesign) <- paste("Array", c(1:36), sep = "")

## run T.fit from a p.vector object
tc.p <- p.vector(tc.DATA, design = make.design.matrix(edesign), Q = 0.01) 
tc.tstep <- T.fit(data = tc.p , alfa = 0.05)

## run T.fit from a data matrix and a design matrix
dise <- make.design.matrix(edesign)
tc.tstep <- T.fit (data = tc.DATA[271:300,], design = dise$dis, 
                   step.method = "two.ways.backward", min.obs = 10, alfa = 0.05)
tc.tstep$sol # gives the p.values of the significant 
             # regression coefficients of the optimized models
}

\keyword{ regression }
\keyword{ models }