## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    crop = NULL,
    dpi = 100
)

## ----installation, eval=FALSE-------------------------------------------------
#  if(!requireNamespace('BiocManager', quietly = TRUE))
#    install.packages('BiocManager')
#  BiocManager::install("cogeqc")

## ----load_package, message=FALSE----------------------------------------------
# Load package after installation
library(cogeqc)

## ----get_maize_genomes--------------------------------------------------------
# Example 1: get stats for all maize genomes using taxon name
maize_stats <- get_genome_stats(taxon = "Zea mays")
head(maize_stats)
str(maize_stats)

# Example 2: get stats for all maize genomes using NCBI Taxonomy ID
maize_stats2 <- get_genome_stats(taxon = 4577)

# Checking if objects are the same
identical(maize_stats, maize_stats2)

## ----get_maize_genomes_with_filters-------------------------------------------
# Get chromosome-scale maize genomes with annotation
## Create list of filters
filt <- list(
    filters.has_annotation = "true",
    filters.assembly_level = "chromosome"
)
filt

## Obtain data
filtered_maize_genomes <- get_genome_stats(taxon = "Zea mays", filters = filt)
dim(filtered_maize_genomes)

## -----------------------------------------------------------------------------
# Check column names in the data frame of stats for maize genomes on the NCBI
names(maize_stats)

# Create a simulated data frame of stats for a maize genome
my_stats <- data.frame(
    accession = "my_lovely_maize",
    sequence_length = 2.4 * 1e9,
    gene_count_total = 50000,
    CC_ratio = 2
)

# Compare stats
compare_genome_stats(ncbi_stats = maize_stats, user_stats = my_stats)

## ----plot_genome_stats, fig.width=10, fig.height=5----------------------------
# Summarize genome stats in a plot
plot_genome_stats(ncbi_stats = maize_stats)

## ----plot_genome_stats_with_user_stats, fig.width=10, fig.height=5------------
plot_genome_stats(ncbi_stats = maize_stats, user_stats = my_stats)

## ----run_busco, eval=FALSE----------------------------------------------------
#  # Path to FASTA file
#  sequence <- system.file("extdata", "Hse_subset.fa", package = "cogeqc")
#  
#  # Path to directory where BUSCO datasets will be stored
#  download_path <- paste0(tempdir(), "/datasets")
#  
#  # Run BUSCO if it is installed
#  if(busco_is_installed()) {
#    run_busco(sequence, outlabel = "Hse", mode = "genome",
#              lineage = "burkholderiales_odb10",
#              outpath = tempdir(), download_path = download_path)
#  }

## -----------------------------------------------------------------------------
# Path to output directory
output_dir <- system.file("extdata", package = "cogeqc")

busco_summary <- read_busco(output_dir)
busco_summary

## -----------------------------------------------------------------------------
data(batch_summary)
batch_summary

## ----plot_busco, out.width = '100%'-------------------------------------------
# Single FASTA file - Ostreococcus tauri
plot_busco(busco_summary)

# Batch mode - Herbaspirillum seropedicae and H. rubrisubalbicans
plot_busco(batch_summary)

## ----session_info-------------------------------------------------------------
sessioninfo::session_info()