## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk[["set"]]( collapse = TRUE, comment = "#>" # fig.width = 5L, # fig.height = 5L ) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(COTAN) library(zeallot) library(rlang) library(data.table) library(Rtsne) library(qpdf) library(GEOquery) library(ComplexHeatmap) options(parallelly.fork.enable = TRUE) ## ----eval=TRUE, include=TRUE-------------------------------------------------- dataDir <- tempdir() GEO <- "GSM2861514" fName <- "GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz" dataSetFile <- file.path(dataDir, GEO, fName) dir.create(file.path(dataDir, GEO), showWarnings = FALSE) if (!file.exists(dataSetFile)) { getGEOSuppFiles(GEO, makeDirectory = TRUE, baseDir = dataDir, fetch_files = TRUE, filter_regex = fName) } sample.dataset <- read.csv(dataSetFile, sep = "\t", row.names = 1L) ## ----------------------------------------------------------------------------- outDir <- dataDir # Log-level 2 was chosen to showcase better how the package works # In normal usage a level of 0 or 1 is more appropriate setLoggingLevel(2L) # This file will contain all the logs produced by the package # as if at the highest logging level setLoggingFile(file.path(outDir, "vignette_v2.log")) message("COTAN uses the `torch` library when asked to `optimizeForSpeed`") message("Run the command 'options(COTAN.UseTorch = FALSE)'", " in your session to disable `torch` completely!") # this command does check whether the torch library is properly installed c(useTorch, deviceStr) %<-% COTAN:::canUseTorch(TRUE, "cuda") if (useTorch) { message("The `torch` library is available and ready to use") if (deviceStr == "cuda") { message("The `torch` library can use the `CUDA` GPU") } else { message("The `torch` library can only use the CPU") message("Please ensure you have the `OpenBLAS` libraries", " installed on the system") } } rm(useTorch, deviceStr) ## ----------------------------------------------------------------------------- cond <- "mouse_cortex_E17.5" obj <- COTAN(raw = sample.dataset) obj <- initializeMetaDataset(obj, GEO = GEO, sequencingMethod = "Drop_seq", sampleCondition = cond) logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])), logLevel = 1L) ## ----------------------------------------------------------------------------- plot(ECDPlot(obj)) ## ----------------------------------------------------------------------------- plot(cellSizePlot(obj)) ## ----------------------------------------------------------------------------- plot(genesSizePlot(obj)) ## ----------------------------------------------------------------------------- plot(scatterPlot(obj)) ## ----------------------------------------------------------------------------- cellsSizeThr <- 6000L obj <- addElementToMetaDataset(obj, "Cells size threshold", cellsSizeThr) cells_to_rem <- getCells(obj)[getCellsSize(obj) > cellsSizeThr] obj <- dropGenesCells(obj, cells = cells_to_rem) plot(cellSizePlot(obj)) ## ----------------------------------------------------------------------------- genesSizeHighThr <- 3000L obj <- addElementToMetaDataset(obj, "Num genes high threshold", genesSizeHighThr) cells_to_rem <- getCells(obj)[getNumExpressedGenes(obj) > genesSizeHighThr] obj <- dropGenesCells(obj, cells = cells_to_rem) plot(genesSizePlot(obj)) ## ----------------------------------------------------------------------------- genesSizeLowThr <- 500L obj <- addElementToMetaDataset(obj, "Num genes low threshold", genesSizeLowThr) cells_to_rem <- getCells(obj)[getNumExpressedGenes(obj) < genesSizeLowThr] obj <- dropGenesCells(obj, cells = cells_to_rem) plot(genesSizePlot(obj)) ## ----------------------------------------------------------------------------- c(mitPlot, mitSizes) %<-% mitochondrialPercentagePlot(obj, genePrefix = "^Mt") plot(mitPlot) ## ----------------------------------------------------------------------------- mitPercThr <- 1.0 obj <- addElementToMetaDataset(obj, "Mitoc. perc. threshold", mitPercThr) cells_to_rem <- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr] obj <- dropGenesCells(obj, cells = cells_to_rem) c(mitPlot, mitSizes) %<-% mitochondrialPercentagePlot(obj, genePrefix = "^Mt") plot(mitPlot) ## ----------------------------------------------------------------------------- genes_to_rem <- getGenes(obj)[grep("^Mt", getGenes(obj))] cells_to_rem <- getCells(obj)[which(getCellsSize(obj) == 0L)] obj <- dropGenesCells(obj, genes_to_rem, cells_to_rem) ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1L) ## ----------------------------------------------------------------------------- obj <- addElementToMetaDataset(obj, "Num drop B group", 0) obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) plot(pcaCellsPlot) ## ----------------------------------------------------------------------------- plot(genesPlot) ## ----eval=TRUE, include=TRUE-------------------------------------------------- cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"] obj <- dropGenesCells(obj, cells = cells_to_rem) obj <- addElementToMetaDataset(obj, "Num drop B group", 1) obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) plot(pcaCellsPlot) ## ----------------------------------------------------------------------------- plot(UDEPlot) ## ----------------------------------------------------------------------------- plot(nuPlot) ## ----------------------------------------------------------------------------- plot(zoomedNuPlot) ## ----------------------------------------------------------------------------- UDELowThr <- 0.30 obj <- addElementToMetaDataset(obj, "Low UDE cells' threshold", UDELowThr) obj <- addElementToMetaDataset(obj, "Num drop B group", 2) cells_to_rem <- getCells(obj)[getNu(obj) < UDELowThr] obj <- dropGenesCells(obj, cells = cells_to_rem) ## ----------------------------------------------------------------------------- obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) plot(pcaCellsPlot) ## ----------------------------------------------------------------------------- plot(scatterPlot(obj)) ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1L) ## ----------------------------------------------------------------------------- obj <- proceedToCoex(obj, calcCoex = TRUE, optimizeForSpeed = TRUE, cores = 6L, deviceStr = "cuda", saveObj = FALSE, outDir = outDir) ## ----eval=FALSE, include=TRUE------------------------------------------------- # # saving the structure # saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS"))) ## ----eval=FALSE, include=TRUE------------------------------------------------- # obj2 <- automaticCOTANObjectCreation( # raw = sample.dataset, # GEO = GEO, # sequencingMethod = "Drop_seq", # sampleCondition = cond, # calcCoex = TRUE, cores = 6L, optimizeForSpeed = TRUE, # saveObj = TRUE, outDir = outDir) ## ----------------------------------------------------------------------------- gdiDF <- calculateGDI(obj) head(gdiDF) # This will store only the $GDI column obj <- storeGDI(obj, genesGDI = gdiDF) ## ----------------------------------------------------------------------------- genesList <- list( "NPGs" = c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"), "PNGs" = c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"), "hk" = c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a", "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1", "Tars", "Amacr") ) GDIPlot(obj, cond = cond, genes = genesList, GDIThreshold = 1.40) ## ----------------------------------------------------------------------------- plot(heatmapPlot(obj, genesLists = genesList)) ## ----------------------------------------------------------------------------- plot(genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Vim", "Hes1"), pValueThreshold = 0.001, symmetric = TRUE)) ## ----eval=FALSE, include=TRUE------------------------------------------------- # plot(genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Fezf2"), # secondaryMarkers = c("Gabra3", "Meg3", "Cux1", "Neurod6"), # pValueThreshold = 0.001, symmetric = FALSE)) ## ----------------------------------------------------------------------------- print("Contingency Tables:") contingencyTables(obj, g1 = "Satb2", g2 = "Bcl11b") print("Corresponding Coex") getGenesCoex(obj)["Satb2", "Bcl11b"] ## ----------------------------------------------------------------------------- # For the whole matrix coex <- getGenesCoex(obj, zeroDiagonal = FALSE) coex[1000L:1005L, 1000L:1005L] ## ----------------------------------------------------------------------------- # For a partial matrix coex <- getGenesCoex(obj, genes = c("Satb2", "Bcl11b", "Fezf2")) coex[1000L:1005L, ] ## ----eval=TRUE, include=TRUE-------------------------------------------------- layersGenes <- list( "L1" = c("Reln", "Lhx5"), "L2/3" = c("Satb2", "Cux1"), "L4" = c("Rorb", "Sox5"), "L5/6" = c("Bcl11b", "Fezf2"), "Prog" = c("Vim", "Hes1") ) c(gSpace, eigPlot, pcaGenesClDF, treePlot) %<-% establishGenesClusters(obj, groupMarkers = layersGenes, numGenesPerMarker = 25L, kCuts = 5L) plot(eigPlot) ## ----eval=TRUE, include=TRUE-------------------------------------------------- plot(treePlot) ## ----eval=TRUE, include=TRUE-------------------------------------------------- colSelection <- vapply(pcaGenesClDF, is.numeric, logical(1L)) genesUmapPl <- UMAPPlot(pcaGenesClDF[, colSelection, drop = FALSE], clusters = getColumnFromDF(pcaGenesClDF, "hclust"), elements = layersGenes, title = "Genes' clusters UMAP Plot", numNeighbors = 32L, minPointsDist = 0.25) plot(genesUmapPl) ## ----eval=FALSE, include=TRUE------------------------------------------------- # # This code is a little too computationally heavy to be used in an example # # So we stored the result and we can load it in the next section # # # default constructed checker is OK # advChecker <- new("AdvancedGDIUniformityCheck") # # c(splitClusters, splitCoexDF) %<-% # cellsUniformClustering(obj, initialResolution = 0.8, checker = advChecker, # optimizeForSpeed = TRUE, deviceStr = "cuda", # cores = 6L, saveObj = TRUE, outDir = outDir) # # obj <- addClusterization(obj, clName = "split", # clusters = splitClusters, coexDF = splitCoexDF) # # table(splitClusters) ## ----eval=TRUE, include=TRUE-------------------------------------------------- data("vignette.split.clusters", package = "COTAN") splitClusters <- vignette.split.clusters[getCells(obj)] splitCoexDF <- DEAOnClusters(obj, clusters = splitClusters) obj <- addClusterization(obj, clName = "split", clusters = splitClusters, coexDF = splitCoexDF, override = FALSE) ## ----eval=TRUE, include=TRUE-------------------------------------------------- c(summaryData, summaryPlot) %<-% clustersSummaryPlot(obj, clName = "split", plotTitle = "split summary") summaryData ## ----eval=TRUE, include=TRUE-------------------------------------------------- plot(summaryPlot) ## ----eval=TRUE, include=TRUE-------------------------------------------------- c(splitHeatmap, scoreDF, pValueDF) %<-% clustersMarkersHeatmapPlot(obj, groupMarkers = layersGenes, clName = "split", kCuts = 5L, adjustmentMethod = "holm") draw(splitHeatmap) ## ----eval=FALSE, include=TRUE------------------------------------------------- # c(mergedClusters, mergedCoexDF) %<-% # mergeUniformCellsClusters(obj, clusters = splitClusters, # checkers = advChecker, # optimizeForSpeed = TRUE, deviceStr = "cuda", # cores = 6L, saveObj = TRUE, outDir = outDir) # # # merges are: # # 1 <- 06 + 07 # # 2 <- '-1' + 08 + 11 # # 3 <- 09 + 10 # # 4 <- 01 # # 5 <- 02 # # 6 <- 12 + 13 # # 7 <- 03 + 05 # # 8 <- 04 # # obj <- addClusterization(obj, clName = "merge", override = TRUE, # clusters = mergedClusters, coexDF = mergedCoexDF) # # table(mergedClusters) ## ----eval=FALSE, include=TRUE------------------------------------------------- # # checkersList <- list(advChecker, # shiftCheckerThresholds(advChecker, 0.01), # shiftCheckerThresholds(advChecker, 0.03)) # # prevCheckRes <- data.frame() # # # In this case we want to re-use the already calculated merge checks # # Se we reload them from the output files. This, along a similar facility for # # the split method, is also useful in the cases the execution is interrupted # # prematurely... # # # if (TRUE) { # # read from the last file among those named all_check_results_XX.csv # mergeDir <- file.path(outDir, cond, "leafs_merge") # resFiles <- list.files(path = mergeDir, pattern = "all_check_results_.*csv", # full.names = TRUE) # # prevCheckRes <- read.csv(resFiles[length(resFiles)], # header = TRUE, row.names = 1L) # } # # c(mergedClusters2, mergedCoexDF2) %<-% # mergeUniformCellsClusters(obj, clusters = splitClusters, # checkers = checkersList, # allCheckResults = prevCheckRes, # optimizeForSpeed = TRUE, deviceStr = "cuda", # cores = 6L, saveObj = TRUE, outDir = outDir) # # # merges are: # # 1 <- '-1' + 06 + 09 # # 2 <- 07 # # 3 <- 03 + 04 + 05 + 10 + 13 # # 4 <- 12 # # 5 <- 01 + 08 + 11 # # 6 <- 02 # # obj <- addClusterization(obj, clName = "merge2", override = TRUE, # clusters = mergedClusters2, coexDF = mergedCoexDF2) # # table(mergedClusters2) ## ----eval=TRUE, include=TRUE-------------------------------------------------- data("vignette.merge.clusters", package = "COTAN") mergedClusters <- vignette.merge.clusters[getCells(obj)] mergedCoexDF <- DEAOnClusters(obj, clusters = mergedClusters) obj <- addClusterization(obj, clName = "merge", clusters = mergedClusters, coexDF = mergedCoexDF, override = FALSE) data("vignette.merge2.clusters", package = "COTAN") mergedClusters2 <- vignette.merge2.clusters[getCells(obj)] mergedCoexDF2 <- DEAOnClusters(obj, clusters = mergedClusters2) obj <- addClusterization(obj, clName = "merge2", clusters = mergedClusters2, coexDF = mergedCoexDF2, override = FALSE) ## ----eval=TRUE, include=TRUE-------------------------------------------------- c(mergeHeatmap, ...) %<-% clustersMarkersHeatmapPlot(obj, clName = "merge", condNameList = "split", conditionsList = list(splitClusters)) draw(mergeHeatmap) c(mergeHeatmap2, ...) %<-% clustersMarkersHeatmapPlot(obj, clName = "merge2", condNameList = "split", conditionsList = list(splitClusters)) draw(mergeHeatmap2) ## ----------------------------------------------------------------------------- c(umapPlot, cellsPCA) %<-% cellsUMAPPlot(obj, clName = "merge2", dataMethod = "AdjBinarized", genesSel = "HGDI", colors = NULL, numNeighbors = 15L, minPointsDist = 0.2) plot(umapPlot) ## ----------------------------------------------------------------------------- if (file.exists(file.path(outDir, paste0(cond, ".cotan.RDS")))) { #Delete file if it exists file.remove(file.path(outDir, paste0(cond, ".cotan.RDS"))) } if (file.exists(file.path(outDir, paste0(cond, "_times.csv")))) { #Delete file if it exists file.remove(file.path(outDir, paste0(cond, "_times.csv"))) } if (dir.exists(file.path(outDir, cond))) { unlink(file.path(outDir, cond), recursive = TRUE) } if (dir.exists(file.path(outDir, GEO))) { unlink(file.path(outDir, GEO), recursive = TRUE) } # stop logging to file setLoggingFile("") file.remove(file.path(outDir, "vignette_v2.log")) options(parallelly.fork.enable = FALSE) ## ----------------------------------------------------------------------------- Sys.time() ## ----------------------------------------------------------------------------- sessionInfo()