\name{GenerateLearningsets} \alias{GenerateLearningsets} \title{Repeated Divisions into learn- and tets sets} \description{ Due to very small sample sizes, the classical division learnset/testset does not give accurate information about the classification performance. Therefore, several different divisions should be used and aggregated. The implemented methods are discussed in Braga-Neto and Dougherty (2003) and Molinaro et al. (2005) whose terminology is adopted. This function is usually the basis for all deeper analyses. } \usage{ GenerateLearningsets(n, y, method = c("LOOCV", "CV", "MCCV", "bootstrap"), fold = NULL, niter = NULL, ntrain = NULL, strat = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{n}{The total number of observations in the available data set. May be \code{missing} if \code{y} is provided instead.} \item{y}{A vector of class labels, either \code{numeric} or a \code{factor}. \emph{Must} be given if \code{strat=TRUE} or \code{n} is not specified.} \item{method}{Which kind of scheme should be used to generate divisions into learning sets and test sets ? Can be one of the following: \describe{ \item{"LOOCV"}{Leaving-One-Out Cross Validation.} \item{"CV"}{(Ordinary) Cross-Validation. Note that \code{fold} must as well be specified.} \item{"MCCV"}{Monte-Carlo Cross Validation, i.e. random divisions into learning sets with \code{ntrain}(s.below) observations and tests sets with \code{ntrain} observations.} \item{"bootstrap"}{Learning sets are generated by drawing \code{ntrain} times with replacement from all observations. Those not drawn not all form the test set.} } } \item{fold}{Gives the number of CV-groups. Used only when \code{method="CV"}} \item{niter}{Number of iterations (s.\code{details).}} \item{ntrain}{Number of observations in the learning sets. Used only when \code{method="MCCV"} or \code{method="bootstrap"}.} \item{strat}{Logical. Should stratified sampling be performed, i.e. the proportion of observations from each class in the learning sets be the same as in the whole data set ? Does not apply for \code{method = "LOOCV"}.} } \details{ \item When \code{method="CV"}, \code{niter} gives the number of times the whole CV-procedure is repeated. The output matrix has then \code{fold}x\code{niter} rows. When \code{method="MCCV"} or \code{method="bootstrap"}, \code{niter} is simply the number of considered learning sets. \item Note that \code{method="CV",fold=n} is equivalent to \code{method="LOOCV"}. } \value{An object of class \code{\link{learningsets}}} \references{Braga-Neto, U.M., Dougherty, E.R. (2003). Is cross-validation valid for small-sample microarray classification ? \emph{Bioinformatics, 20(3), 374-380} Molinaro, A.M., Simon, R., Pfeiffer, R.M. (2005). Prediction error estimation: a comparison of resampling methods. \emph{Bioinformatics, 21(15), 3301-3307}} \author{Martin Slawski \email{martin.slawski@campus.lmu.de} Anne-Laure Boulesteix \url{http://www.slcmsr.net/boulesteix}} \seealso{\code{\link{learningsets}}, \code{\link{GeneSelection}}, \code{\link{tune}}, \code{\link{classification}}} \examples{ # LOOCV loo <- GenerateLearningsets(n=40, method="LOOCV") show(loo) # five-fold-CV CV5 <- GenerateLearningsets(n=40, method="CV", fold=5) show(loo) # MCCV mccv <- GenerateLearningsets(n=40, method = "MCCV", niter=3, ntrain=30) show(mccv) # Bootstrap boot <- GenerateLearningsets(n=40, method="bootstrap", niter=3) # stratified five-fold-CV set.seed(113) classlabels <- sample(1:3, size = 50, replace = TRUE, prob = c(0.3, 0.5, 0.2)) CV5strat <- GenerateLearningsets(y = classlabels, method="CV", fold=5, strat = TRUE) show(CV5strat) } \keyword{multivariate}