--- title: "Visualizing single cell data" author: - name: Guangchuang Yu and Shuangbin Xu email: guangchuangyu@gmail.com affiliation: Department of Bioinformatics, School of Basic Medical Sciences, Southern Medical University date: "`r Sys.Date()`" output: prettydoc::html_pretty: theme: cayman highlight: github pdf_document: toc: true vignette: > %\VignetteIndexEntry{Visualizing single cell data} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} %\VignetteEncoding{UTF-8} --- ```{r style, echo=FALSE, results="asis", message=FALSE} knitr::opts_chunk$set(tidy = FALSE, warning = FALSE, message = FALSE, fig.width = 9, fig.height = 6) ``` # 1. Introduction Single-cell RNA sequencing (scRNA-seq) and Spatial RNA sequencing are widely used techniques for profiling gene expression in individual cells with their locations in the histological sections. These allow molecular biology to be studied at a resolution that cannot be matched by bulk sequencing of cell populations. To better visualize the result of reduction, spatial gene expression pattern in single cell or spatial experiment data, `ggsc` provides some layer functions based on the `ggplot2` grammar. It can work with the `SingleCellExperiment` class or `Seurat` class, which are the widely used classes for storing data from single cell experiment. # 2. Installation To install `ggsc` package, please enter the following codes in R: ```{r, eval=FALSE} # Release if (!requireNamespace('BiocManager', quietly = TRUE)) install.package("BiocManager") BiocManager::install("ggsc") # Or for devel if(!requireNamespace("remotes", quietly=TRUE)){ install.packages("remotes") } remotes::install_github("YuLab-SMU/ggsc") ``` # 3. The data pre-processing Here we use an example data from a single sample (sample 151673) of human brain dorsolateral prefrontal cortex (DLPFC) in the human brain, measured using the 10x Genomics Visium platform. First, a brief/standard data pre-processing were done with the `scater` and `scran` packages. ```{r, message=FALSE, setup.preprocess} library(BiocParallel) library(STexampleData) library(scater) library(scran) library(ggplot2) # create ExperimentHub instance eh <- ExperimentHub() # query STexampleData datasets myfiles <- query(eh, "STexampleData") spe <- myfiles[["EH7538"]] spe <- addPerCellQC(spe, subsets=list(Mito=grep("^MT-", rowData(spe)$gene_name))) colData(spe) |> head() colData(spe) |> data.frame() |> ggplot(aes(x = sum, y = detected, colour = as.factor(in_tissue))) + geom_point() plotColData(spe, x='sum', y = 'subsets_Mito_percent', other_fields="in_tissue") + facet_wrap(~in_tissue) ``` Firstly, we filter the data to retain the cells that are in the tissue. Then cell-specific biases are normalized using the `computeSumFactors` method. ```{r} spe <- spe[, spe$in_tissue == 1] clusters <- quickCluster( spe, BPPARAM = BiocParallel::MulticoreParam(workers=2), block.BPPARAM = BiocParallel::MulticoreParam(workers=2) ) spe <- computeSumFactors(spe, clusters = clusters, BPPARAM = BiocParallel::MulticoreParam(workers=2)) spe <- logNormCounts(spe) ``` Next, we use the Graph-based clustering method to do the reduction with the `runPCA` and `runTSNE` functions provided in the `scater` package. ```{r} # identify genes that drive biological heterogeneity in the data set by # modelling the per-gene variance dec <- modelGeneVar(spe) # Get the top 15% genes. top.hvgs <- getTopHVGs(dec, prop=0.15) spe <- runPCA(spe, subset_row=top.hvgs) output <- getClusteredPCs(reducedDim(spe), BPPARAM = BiocParallel::MulticoreParam(workers=2)) npcs <- metadata(output)$chosen npcs reducedDim(spe, "PCAsub") <- reducedDim(spe, "PCA")[,1:npcs,drop=FALSE] g <- buildSNNGraph(spe, use.dimred="PCAsub", BPPARAM = MulticoreParam(workers=2)) cluster <- igraph::cluster_walktrap(g)$membership colLabels(spe) <- factor(cluster) set.seed(123) spe <- runTSNE(spe, dimred="PCAsub", BPPARAM = MulticoreParam(workers=2)) ``` # Dimensional reduction plot Here, we used the `sc_dim` function provided in the `ggsc` package to visualize the `TSNE` reduction result. Unlike other packages, `ggsc` implemented the `ggplot2` graphic of grammar syntax and visual elements are overlaid through the combinations of graphic layers. The `sc_dim_geom_label` layer is designed to add cell cluster labels to a dimensional reduction plot, and can utilized different implementation of text geoms, such as `geom_shadowtext` in the `shadowtext` package and `geom_text` in the `ggplot2` package (default) through the `geom` argument. ```{r setup} library(ggsc) library(ggplot2) sc_dim(spe, reduction = 'TSNE') + sc_dim_geom_label() sc_dim(spe, reduction = 'TSNE') + sc_dim_geom_label( geom = shadowtext::geom_shadowtext, color='black', bg.color='white' ) ``` # Visualize ‘features’ on a dimensional reduction plot To visualize the gene expression of cells in the result of reduction, `ggsc` provides `sc_feature` function to highlight on a dimensional reduction plot. ```{r} genes <- c('MOBP', 'PCP4', 'SNAP25', 'HBB', 'IGKC', 'NPY') target.features <- rownames(spe)[match(genes, rowData(spe)$gene_name)] sc_feature(spe, target.features[1], slot='logcounts', reduction = 'TSNE') sc_feature(spe, target.features, slot='logcounts', reduction = 'TSNE') ``` In addition, it provides `sc_dim_geom_feature` layer working with `sc_dim` function to visualize the cells expressed the gene and the cell clusters information simultaneously. ```{r} sc_dim(spe, slot='logcounts', reduction = 'TSNE') + sc_dim_geom_feature(spe, target.features[1], color='black') sc_dim(spe, alpha=.3, slot='logcounts', reduction = 'TSNE') + ggnewscale::new_scale_color() + sc_dim_geom_feature(spe, target.features, mapping=aes(color=features)) + scale_color_viridis_d() ``` It also provides `sc_dim_geom_ellipse` to add confidence levels of the the cluster result, and `sc_dim_geom_sub` to select and highlight a specific cluster of cells. ```{r} sc_dim(spe, reduction = 'TSNE') + sc_dim_geom_ellipse(level=0.95) selected.cluster <- c(1, 6, 8) sc_dim(spe, reduction = 'TSNE') + sc_dim_sub(subset=selected.cluster, .column = 'label') sc_dim(spe, color='grey', reduction = 'TSNE') + sc_dim_geom_sub(subset=selected.cluster, .column = 'label') + sc_dim_geom_label(geom = shadowtext::geom_shadowtext, mapping = aes(subset = label %in% selected.cluster), color='black', bg.color='white') ``` # Violin plot of gene expression `ggsc` provides `sc_violin` to visualize the expression information of specific genes using the violin layer with common legend, the genes can be compared more intuitively. ```{r} sc_violin(spe, target.features[1], slot = 'logcounts') sc_violin(spe, target.features[1], slot = 'logcounts', .fun=function(d) dplyr::filter(d, value > 0) ) + ggforce::geom_sina(size=.1) sc_violin(spe, target.features, slot = 'logcounts') + theme(axis.text.x = element_text(angle=45, hjust=1)) ``` # Spatial features To visualize the spatial pattern of gene, `ggsc` provides `sc_spatial` to visualize specific features/genes with image information. ```{r, fig.width = 14, fig.height = 10} library(aplot) f <- sc_spatial(spe, features = target.features, slot = 'logcounts', ncol = 3, image.mirror.axis = NULL, image.rotate.degree = -90 ) f pp <- lapply(target.features, function(i) { sc_spatial(spe, features = i, slot = 'logcounts', image.rotate.degree = -90, image.mirror.axis = NULL) }) aplot::plot_list(gglist = pp) ``` # Session information Here is the output of sessionInfo() on the system on which this document was compiled: ```{r, echo=FALSE} sessionInfo() ```