## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(crop = NULL) ## ----load-example-data, message = FALSE--------------------------------------- library(Cepo) library(SingleCellExperiment) data("cellbench", package = "Cepo") cellbench cellbench = cellbench[!duplicated(rownames(cellbench)),] ## ----visualize colData-------------------------------------------------------- colData(cellbench)[1:5,] ## ----check_cell_types--------------------------------------------------------- unique(cellbench$celltype) ## ----differential-stability-analysis------------------------------------------ ds_res = Cepo(exprsMat = logcounts(cellbench), cellType = cellbench$celltype) ## ----------------------------------------------------------------------------- ds_res ## ----filtering-lowly-expressed-genes------------------------------------------ ds_res_zprop = Cepo::Cepo(exprsMat = logcounts(cellbench), cellTypes = cellbench$celltype, exprsPct = 0.5) ## ----filtering-logfc---------------------------------------------------------- ds_res_logfc = Cepo(exprsMat = logcounts(cellbench), cellTypes = cellbench$celltype, logfc = 1) ## ----------------------------------------------------------------------------- nrow(ds_res$stats) nrow(ds_res_zprop$stats) nrow(ds_res_logfc$stats) ## ----return-p-values1--------------------------------------------------------- ds_res_pvalues = Cepo(exprsMat = logcounts(cellbench), cellType = cellbench$celltype, computePvalue = 200, prefilter_pzero = 0.4) ## ----------------------------------------------------------------------------- idx = rownames(ds_res_pvalues$stats) par(mfrow=c(1,3)) for (i in unique(cellbench$celltype)) { plot(rank(ds_res_pvalues$stats[[i]]), rank(-log10(ds_res_pvalues$pvalues[idx, i])), main = i, xlab = "rank Cepo statistics", ylab = "rank -log10 p-values") } par(mfrow=c(1,1)) ## ----return-p-values2--------------------------------------------------------- ds_res_pvalues = Cepo(exprsMat = logcounts(cellbench), cellType = cellbench$celltype, # we use a low value for demonstration purposes computePvalue = 100, computeFastPvalue = FALSE) ds_res_pvalues ## ----upset-plot--------------------------------------------------------------- library(UpSetR) res_name = topGenes(object = ds_res, n = 500) upset(fromList(res_name), nsets = 3) ## ----plot-densities----------------------------------------------------------- plotDensities(x = cellbench, cepoOutput = ds_res, nGenes = 2, assay = "logcounts", celltypeColumn = "celltype") ## ----plot-densities-genes----------------------------------------------------- plotDensities(x = cellbench, cepoOutput = ds_res, genes = c("PLTP", "CPT1C", "MEG3", "SYCE1", "MICOS10P3", "HOXB7"), assay = "logcounts", celltypeColumn = "celltype") ## ----------------------------------------------------------------------------- data("sce_pancreas", package = "Cepo") sce_pancreas ## ----fig.height=5, fig.width=5------------------------------------------------ library(scater) sce = sce_pancreas sce = scater::logNormCounts(sce) sce = scater::runPCA(sce) scater::plotPCA( sce, colour_by = "cellTypes", shape_by = "batch") ## ----------------------------------------------------------------------------- library(scMerge) data("segList", package = "scMerge") head(segList$human$human_scSEG) corrected <- scMerge( sce_combine = sce, ctl = segList$human$human_scSEG, kmeansK = c(2, 2), assay_name = "scMerge", cell_type = sce$cellTypes) ## ----fig.height=5, fig.width=5------------------------------------------------ corrected = runPCA(corrected, exprs_values = "scMerge") scater::plotPCA( corrected, colour_by = "cellTypes", shape_by = "batch") ## ----------------------------------------------------------------------------- ds_res = Cepo::Cepo(exprsMat = assay(corrected, "scMerge"), cellType = corrected$cellTypes) ## ----------------------------------------------------------------------------- ds_res_batches = Cepo::Cepo(exprsMat = logcounts(sce), cellTypes = sce$cellTypes, block = sce$batch, minCelltype = 2) ## ----------------------------------------------------------------------------- names(ds_res_batches) ## ----fig.height=10, fig.width=10---------------------------------------------- idx = Reduce(intersect, lapply(ds_res_batches, function(x) names(x$stats[, 1]))) combinedRes = as.data.frame(do.call(cbind, lapply(ds_res_batches, function(x) x$stats[idx,] ))) batches = rep(names(ds_res_batches), sapply(ds_res_batches, function(x) length(x$stats))) cty = unlist(lapply(ds_res_batches, function(x) names(x$stats)), use.name = FALSE) colnames(combinedRes) = gsub("[.]", "_", colnames(combinedRes)) annot = data.frame( batch = batches, celltype = cty ) rownames(annot) = colnames(combinedRes) pheatmap::pheatmap(cor(combinedRes), annotation = annot) ## ----marker-genes, fig.height=10, fig.width=15-------------------------------- cepo_genes = Cepo::topGenes(ds_res_batches$average, n = 3) markersPlot = lapply(cepo_genes, function(x) { pp = lapply(x, function(gene) { p = scater::plotPCA( corrected, colour_by = gene, shape_by = "cellTypes") return(p) }) pp = patchwork::wrap_plots(pp, ncol = 3) + patchwork::plot_layout(guides = "auto") return(pp) }) patchwork::wrap_plots(markersPlot, nrow = 2) ## ----gsea, message = FALSE---------------------------------------------------- library(escape) library(fgsea) hallmarkList <- getGeneSets(species = "Homo sapiens", library = "H") fgseaRes <- fgsea(pathways = hallmarkList, stats = sort(ds_res_batches$average$stats[,"beta"]), minSize = 15, maxSize = 500) enriched_beta <- -log10(fgseaRes[order(pval), "padj"][[1]]) names(enriched_beta) <- fgseaRes[order(pval), "pathway"][[1]] ## ----------------------------------------------------------------------------- enriched_beta[1:5] ## ----fig.height=5, fig.width=5------------------------------------------------ plotEnrichment(hallmarkList[["HALLMARK-PANCREAS-BETA-CELLS"]], sort(ds_res_batches$average$stats[, "beta"])) + labs(title="HALLMARK-PANCREAS-BETA-CELLS") ## ----cepo-delayed------------------------------------------------------------- library(DelayedArray) library(HDF5Array) da_matrix = DelayedArray(realize(logcounts(cellbench), "HDF5Array")) class(da_matrix) class(seed(da_matrix)) da_output = Cepo(exprsMat = da_matrix, cellType = cellbench$celltype) ## ----cepo-parallel------------------------------------------------------------ library(BiocParallel) BPPARAM = if (.Platform$OS.type == "windows") { BiocParallel::SnowParam(workers = 2) } else { BiocParallel::MulticoreParam(workers = 2) } DelayedArray::setAutoBPPARAM(BPPARAM = BPPARAM) ## Setting two cores for computation da_output_parallel = Cepo(exprsMat = da_matrix, cellTypes = cellbench$celltype) DelayedArray::setAutoBPPARAM(BPPARAM = SerialParam()) ## Revert back to only one core ## ----sessionInfo-------------------------------------------------------------- sessionInfo()