## ----knitr options, echo = FALSE, include = FALSE----------------------------- plotDev <- if (requireNamespace("ragg", quietly = TRUE)) { "ragg_png" } else if (requireNamespace("Cairo", quietly = TRUE)) { "cairo_png" } else { "png" } knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7L, fig.height = 5L, dev = plotDev, fig.crop = FALSE, dpi = 100L ) ## ----preamble, message=FALSE, warning=FALSE----------------------------------- library(COTAN) library(zeallot) # necessary to solve precedence of overloads conflicted::conflict_prefer("%<-%", "zeallot") # enable multi-processing (not on Windows) prevOptState <- options(parallelly.fork.enable = TRUE) ## ----setup-and-log------------------------------------------------------------ dataDir <- file.path(tempdir(), "COTAN_vignette_data") dir.create(dataDir, recursive = TRUE, showWarnings = FALSE) print(dataDir) outDir <- dataDir # Log-level 2 was chosen to showcase better how the package works # In normal usage a level of 0 or 1 is more appropriate setLoggingLevel(2L) # This file will contain all the logs produced by the package # as if at the highest logging level setLoggingFile(file.path(outDir, "vignette_genes_clustering.log")) ## ----download-data------------------------------------------------------------ GEO <- "GSM2861514" dir.create(file.path(dataDir, GEO), showWarnings = FALSE) datasetFileName <- file.path(dataDir, GEO, "GSM2861514_E175_All_Cells_DGE.txt.gz") # retries up to 5 times to get the dataset attempts <- 0L maxAttempts <- 5L ok <- FALSE while (attempts < maxAttempts && !ok) { attempts <- attempts + 1L if (!file.exists(datasetFileName)) { res <- try( GEOquery::getGEOSuppFiles( GEO = GEO, makeDirectory = TRUE, baseDir = dataDir, filter_regex = base::basename(datasetFileName), fetch_files = TRUE ), silent = TRUE ) } ok <- file.exists(datasetFileName) if (!ok && attempts < maxAttempts) { Sys.sleep(1) } } assertthat::assert_that( ok, msg = paste0( "Failed to retrieve file '", datasetFileName, "' after ", maxAttempts, " attempts." ) ) rawDataset <- read.csv(datasetFileName, sep = "\t", row.names = 1L) print(dim(rawDataset)) ## ----create-cotan-object------------------------------------------------------ cond <- "mouse_cortex_E17.5" obj <- COTAN(raw = rawDataset) obj <- initializeMetaDataset( obj, GEO = GEO, sequencingMethod = "Drop_seq", sampleCondition = cond ) logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])), logLevel = 1L) ## ----add-origin-to-the-cotan-object------------------------------------------- # mark cells origin data(vignette.cells.origin) head(vignette.cells.origin, 18) obj <- addCondition(obj, condName = "origin", conditions = vignette.cells.origin) rm(vignette.cells.origin) ## ----drop-low-quality-cells--------------------------------------------------- # use previously established results to determine # which cells were dropped in the cleaning stage data(vignette.split.clusters) cellsToDrop <- getCells(obj)[!(getCells(obj) %in% names(vignette.split.clusters))] obj <- dropGenesCells(obj, cells = cellsToDrop) # Log the remaining number of cells logThis(paste("n cells", getNumCells(obj)), logLevel = 1L) table(getCondition(obj, condName = "origin")) rm(vignette.split.clusters) ## ----calculate-and-store-coex-matrix------------------------------------------ obj <- proceedToCoex( obj, calcCoex = TRUE, optimizeForSpeed = TRUE, cores = 3L, deviceStr = "cuda" ) ## ----contingency-tables-and-coex-for-gene-pair-------------------------------- cat("Contingency Tables:") print(contingencyTables(obj, g1 = "Satb2", g2 = "Bcl11b")) cat(paste("Corresponding COEX value:", getGenesCoex(obj, zeroDiagonal = FALSE)["Satb2", "Bcl11b"])) ## ----display-small-chunk-of-coex-matrix--------------------------------------- # Extract the whole matrix coex <- getGenesCoex(obj, zeroDiagonal = FALSE) coex[1000L:1005L, 1000L:1005L] ## ----get-coex-sub-matrix------------------------------------------------------ # For a partial matrix coex_sm <- getGenesCoex(obj, genes = c("Satb2", "Bcl11b", "Fezf2"), zeroDiagonal = FALSE) coex_sm[2001:2010, ] ## ----calculate-and-store-gdi-------------------------------------------------- gdiDF <- calculateGDI(obj, cores = 3L) # Show the most *differentially expressed* genes perm <- order(gdiDF[["GDI"]], decreasing = TRUE) head(gdiDF[perm, ], n = 10) # This will store only the $GDI column obj <- storeGDI(obj, genesGDI = gdiDF) ## ----genes-list-definition---------------------------------------------------- genesLists <- list( # Neural Progenitor Genes "NPGs" = c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"), # Pan Neural Genes "PNGs" = c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"), # House Keeping "hk" = c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a", "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1", "Tars", "Amacr") ) ## ----gdi-plot-with-marked-genes----------------------------------------------- GDIPlot(obj, genes = genesLists, GDIThreshold = 1.40) ## ----save-cotan-obj, eval=FALSE----------------------------------------------- # # saving the structure # saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS"))) ## ----coex-heatmap-plot-for-selected-genes------------------------------------- plot(heatmapPlot(obj, genesLists = genesLists, sets = 1L:3L, cores = 3L)) ## ----define-layers-specific-genes--------------------------------------------- layersGenes <- list( "L1" = c("Reln", "Lhx5"), "L2/3" = c("Cux1", "Satb2"), "L4" = c("Rorb", "Sox5"), "L5/6" = c("Bcl11b", "Fezf2"), "Prog" = c("Hes1", "Vim", "Dummy") ) ## ----coex-heatmap-plot-for-primary-marker-genes------------------------------- layersMarkers <- c("Satb2", "Bcl11b", "Vim", "Hes1") genesHeatmapPlot( obj, primaryMarkers = layersMarkers, pValueThreshold = 0.0001, symmetric = TRUE, cores = 3L ) ## ----coex-heatmap-plot-two-genes-sets, eval=FALSE, include=TRUE--------------- # genesHeatmapPlot( # obj, # primaryMarkers = c("Satb2", "Bcl11b", "Fezf2"), # secondaryMarkers = c("Gabra3", "Meg3", "Cux1", "Neurod6"), # pValueThreshold = 0.001, # symmetric = FALSE, # cores = 3L # ) ## ----establish-genes-clusters------------------------------------------------- c(gSpace, eigPlot, pcaGenesClDF, treePlot) %<-% establishGenesClusters( obj, groupMarkers = layersGenes, numGenesPerMarker = 25L, kCuts = 5L ) plot(eigPlot) ## ----genes-tree-plot---------------------------------------------------------- plot(treePlot) ## ----genes-umap-plot---------------------------------------------------------- colSelection <- vapply(pcaGenesClDF, is.numeric, logical(1L)) genesUmapPl <- UMAPPlot( pcaGenesClDF[, colSelection, drop = FALSE], clusters = getColumnFromDF(pcaGenesClDF, "hclust"), elements = layersGenes, title = "Genes' clusters UMAP Plot", numNeighbors = 32L, minPointsDist = 0.25 ) plot(genesUmapPl) ## ----final-clean-up----------------------------------------------------------- if (file.exists(file.path(outDir, paste0(cond, ".cotan.RDS")))) { # delete file if it exists file.remove(file.path(outDir, paste0(cond, ".cotan.RDS"))) } if (file.exists(file.path(outDir, paste0(cond, "_times.csv")))) { # delete file if it exists file.remove(file.path(outDir, paste0(cond, "_times.csv"))) } if (dir.exists(file.path(outDir, cond))) { unlink(file.path(outDir, cond), recursive = TRUE) } # if (dir.exists(file.path(outDir, GEO))) { # unlink(file.path(outDir, GEO), recursive = TRUE) # } # stop logging to file setLoggingFile("") file.remove(file.path(outDir, "vignette_genes_clustering.log")) options(prevOptState) ## ----closure------------------------------------------------------------------ Sys.time() sessionInfo()