\name{lproks}
\alias{lproks}
\docType{data}
\title{ Microbial genome projects at NCBI}
\description{
 Microbial genomes from Entrez genome project at NCBI.
}
\usage{data(lproks)}
\format{
  A genomes data frame with observations on the following 31 variables.
  \describe{
    \item{\code{pid}}{ genome project id}
    \item{\code{name}}{taxonomy name}
   \item{\code{status}}{ sequencing status, Complete, Assemby, or
      In Progress genomes}
    \item{\code{released}}{released date, complete and WGS genomes only}
    \item{\code{taxid}}{taxonomy id}
    \item{\code{kingdom}}{kingdom}
    \item{\code{group}}{phylum or class}
    \item{\code{size}}{genome size (Mbp)}
    \item{\code{GC}}{percent GC content}
    \item{\code{chromosomes}}{number of chromosomes, complete genomes only}
    \item{\code{plasmids}}{number of plasmids, complete genomes only}
    \item{\code{modified}}{modified date, complete genomes only}
    \item{\code{genbank}}{comma-separated list of GenBank accession numbers}
    \item{\code{refseq}}{comma-separated list of RefSeq accession numbers}
    \item{\code{publication}}{comma-separated list of PubMed ids, complete genomes only}
    \item{\code{center}}{pipe-separated list of sequencing centers}
    \item{\code{contigs}}{number of genome contigs.  For complete
        genomes, contigs are the sum of chromosomes and plasmids }
    \item{\code{cds}}{ number of coding sequences, WGS only }
    \item{\code{url}}{sequencing center url, WGS and In Progress
        genomes only }
    \item{\code{gram}}{gram stain }
    \item{\code{shape}}{ shape }
    \item{\code{arrange}}{ arrangement}
    \item{\code{endospore}}{ endospores }
    \item{\code{motility}}{ motility}
    \item{\code{salinity}}{ salinity}
    \item{\code{oxygen}}{ oxygen requirement}
    \item{\code{habitat}}{ habitat}
    \item{\code{temp}}{ temperature preference }
    \item{\code{range}}{ temperature range}
    \item{\code{pathogen}}{ pathogenic in host }
    \item{\code{disease}}{ disease}

  }
}
\details{ This table is constructed using all three
tabs at \url{http://www.ncbi.nlm.nih.gov/genomes/lproks.cgi}.
Complete genomes and In Progress tabs are combined and then joined to
the Organism Info tab.

Nearly all of the Assembly genomes released after Sept 2009 are missing release
dates.  If needed, these dates are available from Entrez Genomes (see
the example in \code{\link{wgs}}).

The \code{ update(genomes)} function downloads a recent copy of the
table from NCBI.  The number of new project IDs are reported as well
as the number of project IDs removed (which are typically Assembly
genomes that are now available as a Complete sequence).  Please note
that NCBI is currently changing how prokaryotic genomes are managed
 and some changes to these tables are possible (see 
\url{http://www.ncbi.nlm.nih.gov/genomeprj} for details).  }

\source{
 downloaded from \url{http://www.ncbi.nlm.nih.gov/genomes/lproks.cgi}
}
% \references{}
\examples{
data(lproks)
lproks
#single row (long format)
t(lproks[1,])
class(lproks)
## download stats
attributes(lproks)[c("stats", "date","url")] 
summary(lproks)
## many Assembly genomes are now missing release dates
table2(!is.na(lproks$released), lproks$status, dnn=list("Released Date?", "Status"))
plot(lproks)
plotby(lproks, log='y', las=1)
## download recent table from NCBI
\dontrun{update(lproks)} 
## Yersinia genomes
yp <- subset(lproks, name \%like\% 'Yersinia*')
yp
summary(yp)
plotby(yp, labels=TRUE, cex=.5, lbty='n')
}
\keyword{datasets}