## ----eval = FALSE-------------------------------------------------------------
# if (!requireNamespace("devtools", quietly = TRUE))
#   install.packages("devtools")
# devtools::install_github("wejlab/MetaScope")

## ----eval = FALSE-------------------------------------------------------------
# suppressPackageStartupMessages({
#   library(MetaScope)
#   library(dplyr)
# })

## ----eval = FALSE-------------------------------------------------------------
# NCBI_key <- "<Your key here>"
# options("ENTREZ_KEY" = NCBI_key)

## ----eval = FALSE, message = FALSE--------------------------------------------
# # Get barcode, index, and read data locations
# barcodePath <-
#   system.file("extdata", "barcodes.txt", package = "MetaScope")
# 
# indexPath <- system.file("extdata", "virus_example_index.fastq",
#                          package = "MetaScope")
# readPath <-
#   system.file("extdata", "virus_example.fastq", package = "MetaScope")
# 
# # Get barcode, index, and read data locations
# demult <-
#   meta_demultiplex(barcodePath,
#                    indexPath,
#                    readPath,
#                    rcBarcodes = FALSE,
#                    hammingDist = 2,
#                    location = tempfile())
# demult

## ----eval = FALSE-------------------------------------------------------------
# # For RefSeq data, set reference = TRUE and representative = FALSE
# download_refseq('bacteria', reference = TRUE, representative = FALSE,
#                 out_dir = NULL, compress = TRUE, patho_out = FALSE,
#                 caching = TRUE)
# 
# # If the RefSeq assortment is too limited, set representative = TRUE
# download_refseq('bacteria', reference = TRUE, representative = FALSE,
#                 out_dir = NULL, compress = TRUE, patho_out = FALSE,
#                 caching = TRUE)

## ----eval = FALSE-------------------------------------------------------------
# 
# for (taxa in c('bacteria', 'viruses', 'fungi')) {
#   download_refseq(taxa, reference = TRUE, representative = FALSE,
#                 out_dir = NULL, compress = TRUE, patho_out = FALSE,
#                 caching = TRUE)
# }

## ----eval = FALSE-------------------------------------------------------------
# download_refseq("Homo sapiens", reference = TRUE, representative = FALSE,
#                 out_dir = NULL, compress = TRUE, patho_out = FALSE,
#                 caching = TRUE)

## ----eval = FALSE-------------------------------------------------------------
# head(MetaScope:::taxonomy_table)

## ----eval = FALSE-------------------------------------------------------------
# all_staph_strains <- MetaScope:::taxonomy_table |>
#   dplyr::filter(genus == "Staphylococcus") |>
#   dplyr::pull(strain)
# 
# # Download representative genomes
# sapply(all_staph_strains,
#        function(strain) download_refseq(strain, reference = TRUE, representative = TRUE,
#                 out_dir = NULL, compress = TRUE, patho_out = FALSE,
#                 caching = TRUE))

## ----eval = FALSE-------------------------------------------------------------
# nThreads <- 1
# 
# # Example - target genome fungi
# mk_bowtie_index(ref_dir = "fungi", lib_dir = "all_indices",
#                 lib_name = "fungi", threads = nThreads, overwrite = TRUE)

## ----eval = FALSE-------------------------------------------------------------
# # If unpaired, see documentation
# readPath1 <- "reads1.paired.fastq.gz"
# readPath2 <- "reads2.paired.fastq.gz"
# 
# # Change to your target library names
# targets <- c("fungi", "bacteria", "viruses")
# 
# alignment_directory <- "<where should alignments be output?>"
# 
# threads <- 8
# 
# # Usually expTag is a dynamically changing sample name
# expTag <- "sampleX"
# 
# # Location of alignment indices
# all_indices <- "<Location of alignment indices>"

## ----eval = FALSE-------------------------------------------------------------
# data("bt2_params")
# # Use 16S params instead if you have 16S data
# data("bt2_16S_params")
# 
# target_map <- align_target_bowtie(read1 = readPath1,
#                                   read2 = readPath2,
#                                   # Where are indices stored?
#                                   lib_dir = all_indices,
#                                   libs =  targets,
#                                   align_dir = alignment_directory,
#                                   align_file = expTag,
#                                   overwrite = TRUE,
#                                   threads = threads,
#                                   # Use 16S params if you have 16S data
#                                   bowtie2_options = bt2_params,
#                                   quiet = FALSE)

## ----eval = FALSE-------------------------------------------------------------
# data("align_details")
# 
# target_map <- align_target(read1 = readPath1,
#                            read2 = readPath2,
#                            lib_dir = all_indices,
#                            libs = targets,
#                            threads = threads,
#                            lib_dir = target_ref_temp,
#                            align_file =
#                              file.path(alignment_directory, expTag),
#                            subread_options = align_details,
#                            quiet = FALSE)

## ----eval = FALSE-------------------------------------------------------------
# # Change to your filter library name(s)
# filters <- c("homo_sapiens")
# 
# threads <- 8
# 
# output <- paste(paste0(alignment_directory, expTag), "filtered", sep = ".")

## ----eval = FALSE-------------------------------------------------------------
# data("bt2_params")
# 
# # target_map is the bam file output by the MetaAlign module
# final_map <- filter_host_bowtie(reads_bam = target_map,
#                                 lib_dir = all_indices,
#                                 libs = filters,
#                                 make_bam = FALSE,
#                                 output = output,
#                                 threads = threads,
#                                 bowtie2_options = bt2_params,
#                                 overwrite = TRUE,
#                                 quiet = FALSE,
#                                 bowtie2_options = bt2_params)

## ----eval = FALSE-------------------------------------------------------------
# data("align_details")
# 
# final_map <- filter_host(
#   reads_bam = target_map,
#   lib_dir = all_indices,
#   make_bam = FALSE,
#   libs = filters,
#   output = output,
#   threads = threads,
#   subread_options = align_details)

## ----eval = FALSE-------------------------------------------------------------
# your_ENTREZ_KEY <- "<Your key here>"
# outDir <- "<Your output directory>"
# 
# Sys.setenv(ENTREZ_KEY = your_ENTREZ_KEY)

## ----eval = FALSE-------------------------------------------------------------
# # NOTE: If 16S sample, use MetaAlign output
# input_file <- target_map
# 
# metascope_id(input_file,
#              input_type = "bam",
#              aligner = "bowtie2",
#              NCBI_key = your_ENTREZ_KEY,
#              num_species_plot = 15,
#              quiet = FALSE,
#              out_dir = outDir)

## ----eval = FALSE-------------------------------------------------------------
# # NOTE: If Metagenomics sample, use MetaFilter output
# input_file <- final_map
# 
# metascope_id(input_file,
#              input_type = "csv.gz",
#              aligner = "bowtie2",
#              NCBI_key = your_ENTREZ_KEY,
#              num_species_plot = 15,
#              quiet = FALSE,
#              out_dir = outDir)

## ----eval = FALSE-------------------------------------------------------------
# # NOTE: If 16S sample, use MetaAlign output
# input_file <- target_map
# 
# metascope_id(input_file,
#              input_type = "bam",
#              aligner = "subread",
#              NCBI_key = your_ENTREZ_KEY,
#              num_species_plot = 15,
#              quiet = FALSE,
#              out_dir = outDir)

## ----eval = FALSE-------------------------------------------------------------
# # NOTE: If Metagenomics sample, use MetaFilter output
# input_file <- final_map
# 
# metascope_id(input_file,
#              input_type = "csv.gz",
#              aligner = "subread",
#              NCBI_key = your_ENTREZ_KEY,
#              num_species_plot = 15,
#              quiet = FALSE,
#              out_dir = outDir)

## ----eval = FALSE-------------------------------------------------------------
# tempfolder <- tempfile()
# dir.create(tempfolder)
# 
# # Create three different samples
# samp_names <- c("X123", "X456", "X789")
# all_files <- file.path(tempfolder,
#                        paste0(samp_names, ".csv"))
# 
# create_IDcsv <- function (out_file) {
#   final_taxids <- c("273036", "418127", "11234")
#   final_genomes <- c(
#     "Staphylococcus aureus RF122, complete sequence",
#     "Staphylococcus aureus subsp. aureus Mu3, complete sequence",
#     "Measles virus, complete genome")
#   best_hit <- sample(seq(100, 1050), 3)
#   proportion <- best_hit/sum(best_hit) |> round(2)
#   EMreads <- best_hit + round(runif(3), 1)
#   EMprop <- proportion + 0.003
#   dplyr::tibble(TaxonomyID = final_taxids,
#                 Genome = final_genomes,
#                 read_count = best_hit, Proportion = proportion,
#                 EMreads = EMreads, EMProportion = EMprop) |>
#     dplyr::arrange(dplyr::desc(.data$read_count)) |>
#     utils::write.csv(file = out_file, row.names = FALSE)
#   message("Done!")
#   return(out_file)
# }
# out_files <- vapply(all_files, create_IDcsv, FUN.VALUE = character(1))
# 

## ----eval = FALSE-------------------------------------------------------------
# annot_dat <- file.path(tempfolder, "annot.csv")
# dplyr::tibble(Sample = samp_names, RSV = c("pos", "neg", "pos"),
#               month = c("March", "July", "Aug"),
#               yrsold = c(0.5, 0.6, 0.2)) |>
#   utils::write.csv(file = annot_dat,
#                    row.names = FALSE)

## ----eval = FALSE-------------------------------------------------------------
# outMAE <- convert_animalcules(meta_counts = out_files,
#                               annot_path = annot_dat,
#                               which_annot_col = "Sample",
#                               end_string = ".metascope_id.csv",
#                               qiime_biom_out = FALSE,
#                               NCBI_key = NULL)
# 
# unlink(tempfolder, recursive = TRUE)

## ----session info-------------------------------------------------------------
sessionInfo()