\name{assemble.data}
\alias{assemble.data}
\title{Assemble the data to run the integrated analysis}
\description{Assembles the copy number and expression data and annotation.}

\usage{
assemble.data(dep.data = acgh.data, indep.data = expr.data, ann.dep = colnames(acgh.data)[1:4], ann.indep = colnames(expr.data)[1:4], dep.id = "ID", dep.chr = "CHROMOSOME", dep.pos = "STARTPOS", dep.symb = FALSE, indep.id = "ID", indep.chr = "CHROMOSOME", indep.pos = "STARTPOS", indep.symb = FALSE, overwrite = FALSE, run.name = NULL)
}

\arguments{
   \item{dep.data}{\code{\link{data.frame}.} The dependent data, along with annotations. 
   Each row should correspond to one feature. The following columns are expected to exist, 
   and the column names should be inserted in the function.    
    \code{dep.id.}: A unique identifier.
    \code{dep.chr.}: The number of the chromosome (chrX=23 and chrY=24).
    \code{dep.pos.}: The base pair position, relative to the chromosome. 
    \code{dep.symb.}: Gene symbol (optional).
    The data will be sorted on \code{Abs.start}, generated by chr*10e9+basepair.}
  
  \item{indep.data}{\code{\link{data.frame}.} The independent data, along with annotations. 
  Each row should correspond to one feature. The following columns are expected to exist, 
  and the column names should be inserted in the function.    
    \code{indep.id.}: A unique identifier.
    \code{indep.chr.}: The number of the chromosome (chrX=23 and chrY=24).
    \code{indep.pos.}: The base pair position, relative to the chromosome. 
    \code{indep.symb.}: Gene symbol (optional).
    The data will be sorted on \code{Abs.start}, generated by chr*10e9+basepair.}

  \item{ann.dep}{\code{\link{vector}} with either the names of the columns or the 
  column numbers in the dependent data that contain the annotation.}

  \item{ann.indep}{\code{\link{vector}} with either the names of the columns or the 
  column numbers in the independent data that contain the annotation.}

  \item{dep.id}{\code{\link{vector}} with the column name in the dependent 
  data that contains the ID. Will be used in the \code{\link{sim.plot.zscore.heatmap}} function. 
  Empty ID's will be substituted by NA. }

  \item{dep.chr}{\code{\link{vector}} with column name in the dependent 
  data that contains the chromosome numbers.}

  \item{dep.pos}{\code{\link{vector}} with the column name in the dependent data 
  that contains the position on the chromosome in bases.}

  \item{dep.symb}{Optional, either F(alse) or a single vector with the column name 
  in the dependent data that contains the Symbols. Will be used in \code{\link{sim.plot.zscore.heatmap}} 
  as label.}

  \item{indep.id}{\code{\link{vector}} with the column name in the independent 
  data that contains the ID. Will be used in the \code{\link{sim.plot.zscore.heatmap}} function. 
  Empty ID's will be substituted by NA. }

 \item{indep.chr}{\code{\link{vector}} with the column name in the independent data 
  that contains the chromosome numbers.}

  \item{indep.pos}{\code{\link{vector}} with the column name in the independent data 
  that contains the position on the chromosome in bases.}

  \item{indep.symb}{Optional, either F(alse) or a vector with the column name 
  in the dependent data that contains the Symbols. Will be used in \code{\link{sim.plot.zscore.heatmap}} 
  as label.}

  \item{overwrite}{\code{Boolean}, indicate when a \code{run.name} is already present, 
  the results can be overwritten.}

  \item{run.name}{Name of the analysis. The results will be 
   stored in a folder with this name in the current working directory 
   (use \code{getwd()} to print the current working directory). 
   If the \code{run.name = NULL}, the default folder \code{"analysis_results"} will be generated.}
}

\value{
 No values are returned. Instead, the datasets and annotation columns are stored in 
  seperate files in the \code{data} folder in the directory specified in \code{run.name}. 
  If the \code{assemble.data} function has run succesfully, the \code{\link{integrated.analysis}} 
  function can be performed.
}
\author{Marten Boetzer, Melle Sieswerda, Renee X. de Menezes  \email{R.X.Menezes@lumc.nl}}

\seealso{
\code{\link{SIM}}, \code{\link{integrated.analysis}}, 
\code{\link{sim.plot.zscore.heatmap}}, \code{\link{sim.plot.pvals.on.region}}, 
\code{\link{sim.plot.pvals.on.genome}}, \code{\link{tabulate.pvals}}, \code{\link{tabulate.top.dep.features}}, 
\code{\link{tabulate.top.indep.features}}, \code{\link{impute.nas.by.surrounding}}, 
\code{\link{sim.update.chrom.table}}
}

\examples{
#load the datasets and the samples to run the integrated analysis
data(expr.data)
data(acgh.data)
data(samples) 
         
#read the data
assemble.data(dep.data = acgh.data, indep.data = expr.data, ann.dep = colnames(acgh.data)[1:4], ann.indep = colnames(expr.data)[1:4], dep.id="ID", dep.chr = "CHROMOSOME",dep.pos = "STARTPOS",dep.symb="Symbol", indep.id="ID",indep.chr = "CHROMOSOME", indep.pos = "STARTPOS", indep.symb="Symbol", overwrite = TRUE,run.name ="chr8")
}
\keyword{misc}