\name{sra}
\alias{sra}
\docType{data}
\title{ Microbial SRA samples at the ENA }
\description{ Next-generation sequencing projects from microbes in the Sequence Read Archive (SRA)
at the European Nucleotide Archive (ENA). 
}
\usage{data(sra)}
\format{
  A genomes data frame with 18279 observations on the following 13 variables.
  \describe{
    \item{\code{taxid}}{ taxonomy id}
    \item{\code{name}}{ scientific name (if missing, then title) }
    \item{\code{alias}}{ name qualifier from alias attribute}
    \item{\code{sample}}{ SRA sample }
    \item{\code{submission}}{SRA submission}
    \item{\code{study}}{ SRA study}
    \item{\code{experiment}}{SRA experiment}
    \item{\code{center}}{sequencing center}
    \item{\code{bases}}{ number of bases}
    \item{\code{reads}}{ number of reads}
    \item{\code{submitted}}{ submission date}
    \item{\code{model}}{ model of sequencer }
    \item{\code{type}}{ study type }
  }
}
\details{ Downloaded from ENA on Oct 27, 2011.  Created by joining \code{enaSRA("Bacteria")} and 
 \code{enaSRA("Archaea")} and adding submission dates using \code{\link{enaSubmission}}, model 
  using \code{\link{enaExperiment}} and study type using \code{\link{enaStudy}}.  Microbes represent ~6\% of the total bases in the SRA.
}
\source{ SRA sample portal at ENA }
%\references{ }
\examples{
data(sra)

table2(species( sra$name))
table2(sra$center)
table2(sra$model)
table2(sra$study)

#Average read lengths by model
data.frame(read=round(tapply(sra$bases/sra$reads, list(sra$model ), mean, na.rm=TRUE), 1))

# image plot by model and year
y <- tapply(sra$bases, list(sra$model, year( sra$submit ) ), sum, na.rm=TRUE)
image2( y / 1e9, mar=c(1,11, 4,1) , log=TRUE, round=1)
title("Total microbial bases submitted per year (billions)", cex.main=1, line=2)


}
\keyword{datasets}