## ----include = FALSE---------------------------------------------------------- library(knitr) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE, cache = TRUE ) ## ----install, eval=FALSE------------------------------------------------------ # BiocManager::install("MGnifyR") ## ----load_package------------------------------------------------------------- # library(MGnifyR) ## ----create_client------------------------------------------------------------ # mg <- MgnifyClient() # mg ## ----create_client_passwd, eval=FALSE----------------------------------------- # mg <- MgnifyClient( # username = "Webin-username", password = "your-password", useCache = TRUE) ## ----search_studies----------------------------------------------------------- # northpolar <- doQuery( # mg, "samples", latitude_gte=60.0, experiment_type="amplicon", # biome_name="Soil", instrument_platform = "Illumina", max.hits = 10) # # head(northpolar) ## ----search_studies_accession------------------------------------------------- # study_samples <- doQuery(mg, "studies", accession="MGYS00002891") # # head(study_samples) ## ----convert_to_analyses------------------------------------------------------ # analyses_accessions <- searchAnalysis( # mg, type="studies", accession = study_samples$accession) # # head(analyses_accessions) ## ----get_metadata------------------------------------------------------------- # analyses_metadata <- getMetadata(mg, analyses_accessions) # # head(analyses_metadata) ## ----filter_metadata---------------------------------------------------------- # known_depths <- analyses_metadata[ # !is.na(as.numeric(analyses_metadata$sample_depth)), ] # # How many are left? # dim(known_depths) ## ----get_treese--------------------------------------------------------------- # tse <- getResult(mg, accession = analyses_accessions, get.func = FALSE) # # tse ## ----calculate_diversity------------------------------------------------------ # library(mia) # # tse <- estimateDiversity(tse, index = "shannon") # # library(scater) # # plotColData(tse, "shannon", x = "sample_geo.loc.name") ## ----plot_abundance----------------------------------------------------------- # library(miaViz) # # plotAbundance(tse[!is.na( rowData(tse)[["Kingdom"]] ), ], rank = "Kingdom") ## ----to_phyloseq-------------------------------------------------------------- # pseq <- makePhyloseqFromTreeSE(tse) # pseq ## ----get_analyses------------------------------------------------------------- # soil <- searchAnalysis(mg, "studies", "MGYS00001447") # human <- searchAnalysis(mg, "studies", "MGYS00001442") # marine <- searchAnalysis(mg, "studies", "MGYS00001282") # # # Combine analyses # all_accessions <- c(soil, human, marine) # # head(all_accessions) ## ----get_new_metadata--------------------------------------------------------- # full_metadata <- getMetadata(mg, all_accessions) # # colnames(full_metadata) # head(full_metadata) ## ----full_metatdata_explore--------------------------------------------------- # # Load ggplot2 # library(ggplot2) # # #Distribution of sample source material: # table(full_metadata$`sample_environment-material`) # # #What sequencing machine(s) were used? # table(full_metadata$`sample_instrument model`) # # # Boxplot of raw read counts: # ggplot( # full_metadata, aes(x=study_accession, y=log( # as.numeric(`analysis_Submitted nucleotide sequences`)))) + # geom_boxplot(aes(group=study_accession)) + # theme_bw() + # ylab("log(submitted reads)") ## ----get_mae------------------------------------------------------------------ # mae <- getResult(mg, all_accessions, bulk.dl = TRUE) # # mae ## ----mae_access--------------------------------------------------------------- # mae[[2]] ## ----pcoa--------------------------------------------------------------------- # # Apply relative transformation # mae[[1]] <- transformAssay(mae[[1]], method = "relabundance") # # Perform PCoA # mae[[1]] <- runMDS( # mae[[1]], assay.type = "relabundance", # FUN = vegan::vegdist, method = "bray") # # Plot # plotReducedDim(mae[[1]], "MDS", colour_by = "sample_environment.feature") ## ----fetch_data--------------------------------------------------------------- # kegg <- getData( # mg, type = "kegg-modules", accession = "MGYA00642773", # accession.type = "analyses") # # head(kegg) ## ----get_download_urls-------------------------------------------------------- # # Find list of available downloads # dl_urls <- searchFile( # mg, full_metadata$analysis_accession, type = "analyses") # # # Filter table # target_urls <- dl_urls[ # dl_urls$attributes.description.label == "Predicted CDS with annotation", ] # # head(target_urls) ## ----list_descriptions-------------------------------------------------------- # table(dl_urls$attributes.description.label) ## ----get_genome_urls---------------------------------------------------------- # genome_urls <- searchFile(mg, "MGYG000433953", type = "genomes") # # genome_urls[ , c("id", "attributes.file.format.name", "download_url")] ## ----get_files---------------------------------------------------------------- # # Just select a single file from the target_urls list for demonstration. # # # Default behavior - use local cache. # cached_location1 = getFile(mg, target_urls$download_url[[1]]) # # # Specifying a file # cached_location2 <- getFile( # mg, target_urls$download_url[[1]]) # # cached_location <- c(cached_location1, cached_location2) # # # Where are the files? # cached_location ## ----simple_parse_function---------------------------------------------------- # library(Biostrings) # # # Simple function to a count of unique sequences matching PFAM amoC/mmoC motif # getAmoCseqs <- function(fname){ # sequences <- readAAStringSet(fname) # tgtvec <- grepl("PF04896", names(sequences)) # as.data.frame(as.list(table(as.character(sequences[tgtvec])))) # } ## ----download_with_read------------------------------------------------------- # # Just download a single accession for demonstration, specifying a read_function # amoC_seq_counts <- getFile( # mg, target_urls$download_url[[1]], read_func = getAmoCseqs) # # amoC_seq_counts ## ----session_info------------------------------------------------------------- # sessionInfo()