## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk[["set"]]( collapse = TRUE, comment = "#>" # fig.width = 5L, # fig.height = 5L ) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(COTAN) library(zeallot) library(rlang) library(data.table) library(Rtsne) library(qpdf) library(GEOquery) options(parallelly.fork.enable = TRUE) ## ----eval=TRUE, include=TRUE-------------------------------------------------- dataDir <- tempdir() GEO <- "GSM2861514" fName <- "GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz" dataSetFile <- file.path(dataDir, GEO, fName) dir.create(file.path(dataDir, GEO), showWarnings = FALSE) if (!file.exists(dataSetFile)) { getGEOSuppFiles(GEO, makeDirectory = TRUE, baseDir = dataDir, fetch_files = TRUE, filter_regex = fName) } sample.dataset <- read.csv(dataSetFile, sep = "\t", row.names = 1L) ## ----------------------------------------------------------------------------- outDir <- dataDir # Log-level 2 was chosen to showcase better how the package works # In normal usage a level of 0 or 1 is more appropriate setLoggingLevel(2L) # This file will contain all the logs produced by the package # as if at the highest logging level setLoggingFile(file.path(outDir, "vignette_v2.log")) message("COTAN uses the `torch` library when asked to `optimizeForSpeed`") message("Run the command 'options(COTAN.UseTorch = FALSE)'", " in your session to disable `torch` completely!") # this command does check whether the torch library is properly installed c(useTorch, deviceStr) %<-% COTAN:::canUseTorch(TRUE, "cuda") if (useTorch) { message("The `torch` library is availble and ready to use") if (deviceStr == "cuda") { message("The `torch` library can use the `CUDA` GPU") } else { message("The `torch` library can only use the CPU") message("Please ensure you have the `OpenBLAS` libraries", " installed on the system") } } rm(useTorch, deviceStr) ## ----------------------------------------------------------------------------- cond <- "mouse_cortex_E17.5" obj <- COTAN(raw = sample.dataset) obj <- initializeMetaDataset(obj, GEO = GEO, sequencingMethod = "Drop_seq", sampleCondition = cond) logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])), logLevel = 1L) ## ----------------------------------------------------------------------------- ECDPlot(obj, yCut = 700L) ## ----------------------------------------------------------------------------- cellSizePlot(obj) ## ----------------------------------------------------------------------------- genesSizePlot(obj) ## ----------------------------------------------------------------------------- scatterPlot(obj) ## ----------------------------------------------------------------------------- cellsSizeThr <- 6000L obj <- addElementToMetaDataset(obj, "Cells size threshold", cellsSizeThr) cells_to_rem <- getCells(obj)[getCellsSize(obj) > cellsSizeThr] obj <- dropGenesCells(obj, cells = cells_to_rem) cellSizePlot(obj) ## ----------------------------------------------------------------------------- genesSizeHighThr <- 3000L obj <- addElementToMetaDataset(obj, "Num genes high threshold", genesSizeHighThr) cells_to_rem <- getCells(obj)[getNumExpressedGenes(obj) > genesSizeHighThr] obj <- dropGenesCells(obj, cells = cells_to_rem) genesSizePlot(obj) ## ----------------------------------------------------------------------------- genesSizeLowThr <- 500L obj <- addElementToMetaDataset(obj, "Num genes low threshold", genesSizeLowThr) cells_to_rem <- getCells(obj)[getNumExpressedGenes(obj) < genesSizeLowThr] obj <- dropGenesCells(obj, cells = cells_to_rem) genesSizePlot(obj) ## ----------------------------------------------------------------------------- c(mitPlot, mitSizes) %<-% mitochondrialPercentagePlot(obj, genePrefix = "^Mt") mitPlot ## ----------------------------------------------------------------------------- mitPercThr <- 1.0 obj <- addElementToMetaDataset(obj, "Mitoc. perc. threshold", mitPercThr) cells_to_rem <- rownames(mitSizes)[mitSizes[["mit.percentage"]] > mitPercThr] obj <- dropGenesCells(obj, cells = cells_to_rem) c(mitPlot, mitSizes) %<-% mitochondrialPercentagePlot(obj, genePrefix = "^Mt") plot(mitPlot) ## ----------------------------------------------------------------------------- genes_to_rem <- getGenes(obj)[grep("^Mt", getGenes(obj))] cells_to_rem <- getCells(obj)[which(getCellsSize(obj) == 0L)] obj <- dropGenesCells(obj, genes_to_rem, cells_to_rem) ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1L) ## ----------------------------------------------------------------------------- obj <- addElementToMetaDataset(obj, "Num drop B group", 0) obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- genesPlot ## ----eval=TRUE, include=TRUE-------------------------------------------------- cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"] obj <- dropGenesCells(obj, cells = cells_to_rem) obj <- addElementToMetaDataset(obj, "Num drop B group", 1) obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- UDEPlot ## ----------------------------------------------------------------------------- plot(nuPlot) ## ----------------------------------------------------------------------------- plot(zoomedNuPlot) ## ----------------------------------------------------------------------------- UDELowThr <- 0.30 obj <- addElementToMetaDataset(obj, "Low UDE cells' threshold", UDELowThr) obj <- addElementToMetaDataset(obj, "Num drop B group", 2) cells_to_rem <- getCells(obj)[getNu(obj) < UDELowThr] obj <- dropGenesCells(obj, cells = cells_to_rem) ## ----------------------------------------------------------------------------- obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) plot(pcaCellsPlot) ## ----------------------------------------------------------------------------- scatterPlot(obj) ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1L) ## ----------------------------------------------------------------------------- obj <- proceedToCoex(obj, calcCoex = TRUE, optimizeForSpeed = TRUE, cores = 6L, saveObj = TRUE, outDir = outDir) ## ----eval=FALSE, include=TRUE------------------------------------------------- # # saving the structure # saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS"))) ## ----eval=FALSE, include=TRUE------------------------------------------------- # obj2 <- automaticCOTANObjectCreation( # raw = sample.dataset, # GEO = GEO, # sequencingMethod = "Drop_seq", # sampleCondition = cond, # calcCoex = TRUE, cores = 6L, optimizeForSpeed = TRUE, # saveObj = TRUE, outDir = outDir) ## ----------------------------------------------------------------------------- quantP <- calculateGDI(obj) head(quantP) obj <- storeGDI(obj, genesGDI = getColumnFromDF(quantP, "GDI")) ## ----------------------------------------------------------------------------- genesList <- list( "NPGs" = c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"), "PNGs" = c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"), "hk" = c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a", "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1", "Tars", "Amacr") ) # needs to be recalculated after the changes in nu/dispersion above #obj <- calculateCoex(obj, actOnCells = FALSE) GDIPlot(obj, GDIIn = quantP, cond = cond, genes = genesList, GDIThreshold = 1.43) ## ----------------------------------------------------------------------------- print(cond) ## ----------------------------------------------------------------------------- heatmapPlot(genesLists = genesList, sets = (1L:3L), conditions = c(cond), dir = outDir) ## ----------------------------------------------------------------------------- genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Vim", "Hes1"), pValueThreshold = 0.001, symmetric = TRUE) ## ----eval=FALSE, include=TRUE------------------------------------------------- # genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Fezf2"), # secondaryMarkers = c("Gabra3", "Meg3", "Cux1", "Neurod6"), # pValueThreshold = 0.001, symmetric = FALSE) ## ----------------------------------------------------------------------------- c(observedCT, expectedCT) %<-% contingencyTables(obj, g1 = "Satb2", g2 = "Bcl11b") print("Observed CT") observedCT print("Expected CT") expectedCT print("Coex") getGenesCoex(obj)["Satb2", "Bcl11b"] ## ----------------------------------------------------------------------------- # For the whole matrix coex <- getGenesCoex(obj, zeroDiagonal = FALSE) coex[1000L:1005L, 1000L:1005L] ## ----------------------------------------------------------------------------- # For a partial matrix coex <- getGenesCoex(obj, genes = c("Satb2", "Bcl11b", "Fezf2")) coex[1000L:1005L, ] ## ----eval=TRUE, include=TRUE-------------------------------------------------- layersGenes <- list( "L1" = c("Reln", "Lhx5"), "L2/3" = c("Satb2", "Cux1"), "L4" = c("Rorb", "Sox5"), "L5/6" = c("Bcl11b", "Fezf2"), "Prog" = c("Vim", "Hes1") ) c(gSpace, eigPlot, pcaGenesClDF, treePlot) %<-% establishGenesClusters(obj, groupMarkers = layersGenes, numGenesPerMarker = 25L, kCuts = 5L) plot(eigPlot) ## ----eval=TRUE, include=TRUE-------------------------------------------------- plot(treePlot) ## ----eval=TRUE, include=TRUE-------------------------------------------------- genesUmapPl <- UMAPPlot(pcaGenesClDF[, sapply(pcaGenesClDF, is.numeric)], clusters = getColumnFromDF(pcaGenesClDF, "hclust"), elements = layersGenes, title = "Genes' clusters UMAP Plot", numNeighbors = 32, minPointsDist = 0.25) plot(genesUmapPl) ## ----eval=FALSE, include=TRUE------------------------------------------------- # c(splitClusters, splitCoexDF) %<-% # cellsUniformClustering(obj, GDIThreshold = 1.43, initialResolution = 0.8, # optimizeForSpeed = TRUE, cores = 6L, # saveObj = TRUE, outDir = outDir) # obj <- addClusterization(obj, clName = "split", # clusters = splitClusters, coexDF = splitCoexDF) ## ----eval=TRUE, include=TRUE-------------------------------------------------- data("vignette.split.clusters", package = "COTAN") splitClusters <- vignette.split.clusters[getCells(obj)] splitCoexDF <- DEAOnClusters(obj, clusters = splitClusters) obj <- addClusterization(obj, clName = "split", clusters = splitClusters, coexDF = splitCoexDF) ## ----eval=FALSE, include=TRUE------------------------------------------------- # c(mergeClusters, mergeCoexDF) %<-% # mergeUniformCellsClusters(obj, clusters = splitClusters, GDIThreshold = 1.43, # optimizeForSpeed = TRUE, cores = 6L, # saveObj = TRUE, outDir = outDir) # # # merges are: # # 1 <- '-1' + 1 + 4 # # 2 <- 2 + 3 + 8 # # 3 <- 5 # # 4 <- 6 # # 5 <- 7 # # obj <- addClusterization(obj, clName = "merge", # clusters = mergeClusters, coexDF = mergeCoexDF) ## ----eval=TRUE, include=TRUE-------------------------------------------------- data("vignette.merge.clusters", package = "COTAN") mergeClusters <- vignette.merge.clusters[getCells(obj)] mergeCoexDF <- DEAOnClusters(obj, clusters = mergeClusters) obj <- addClusterization(obj, clName = "merge", clusters = mergeClusters, coexDF = mergeCoexDF) ## ----------------------------------------------------------------------------- c(umapPlot, cellsPCA) %<-% cellsUMAPPlot(obj, clName = "merge", method = "LogNormalized", genesSel = "HGDI", colors = NULL, numNeighbors = 25, minPointsDist = 0.1) plot(umapPlot) ## ----eval=TRUE, include=TRUE-------------------------------------------------- c(.,scoreDF) %<-% clustersMarkersHeatmapPlot(obj, groupMarkers = layersGenes, clName = "split", kCuts = 5L) ## ----eval=TRUE, include=TRUE-------------------------------------------------- c(.,scoreDF) %<-% clustersMarkersHeatmapPlot(obj, groupMarkers = layersGenes, clName = "merge", kCuts = 5L) ## ----------------------------------------------------------------------------- if (file.exists(file.path(outDir, paste0(cond, ".cotan.RDS")))) { #Delete file if it exists file.remove(file.path(outDir, paste0(cond, ".cotan.RDS"))) } if (file.exists(file.path(outDir, paste0(cond, "_times.csv")))) { #Delete file if it exists file.remove(file.path(outDir, paste0(cond, "_times.csv"))) } if (dir.exists(file.path(outDir, cond))) { unlink(file.path(outDir, cond), recursive = TRUE) } if (dir.exists(file.path(outDir, GEO))) { unlink(file.path(outDir, GEO), recursive = TRUE) } # stop logging to file setLoggingFile("") file.remove(file.path(outDir, "vignette_v2.log")) options(parallelly.fork.enable = FALSE) sessionInfo()