--- title: "Import and representation of MERFISH mouse colon IBD data" author: - name: Daniela Corbetta affiliation: Department of Statistical Sciences, University of Padova - name: Ludwig Geistlinger affiliation: Center for Computational Biomedicine, Harvard Medical School - name: Tyrone Lee affiliation: Center for Computational Biomedicine, Harvard Medical School - name: Jeffrey Moffitt affiliation: Department of Microbiology, Harvard Medical School - name: Robert Gentleman affiliation: Center for Computational Biomedicine, Harvard Medical School output: BiocStyle::html_document: self_contained: yes toc: true toc_float: true toc_depth: 2 code_folding: show date: "`r doc_date()`" vignette: > % \VignetteIndexEntry{Mouse colon ibd} % \VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", crop = NULL ) ``` # Setup After the installation, we proceed by loading the package and additional packages used in the vignette. ```{r lib, message = FALSE} library(MerfishData) library(ExperimentHub) library(ggplot2) library(scater) library(terra) ``` # Data Gut inflammation involves contributions from immune and non-immune cells, whose interactions are shaped by the spatial organization of the healthy gut and its remodeling during inflammation. The crosstalk between fibroblasts and immune cells is an important axis in this process, but our understanding has been challenged by incomplete cell-type definition and biogeography. To address this challenge, [Cadinu et al., 2024](https://doi.org/10.1016/j.cell.2024.03.013) used multiplexed error-robust fluorescence in situ hybridization (MERFISH) to profile the expression of 943 genes in 1.35 million cells imaged across the onset and recovery from a mouse colitis model. To assign RNAs to individual cells, they used [Baysor](https://github.com/kharchenkolab/Baysor). They identified diverse cell populations, charted their spatial organization, and revealed their polarization or recruitment in inflammation. This vignette demonstrates how to obtain the MERFISH mouse IBD colon dataset from [Cadinu et al., 2024](https://doi.org/10.1016/j.cell.2024.03.013) from Bioconductor's [ExperimentHub](https://bioconductor.org/packages/ExperimentHub). The data was obtained from the [datadryad data publication](https://doi.org/10.5061/dryad.rjdfn2zh3). ```{r eh} eh <- ExperimentHub() AnnotationHub::query(eh, c("MerfishData", "Cadinu2024")) ``` ## Segmented data It is also possible to obtain the data in a [SpatialExperiment](https://bioconductor.org/packages/SpatialExperiment), which integrates experimental data and cell metadata, and provides designated accessors for the spatial coordinates. ```{r data, message = FALSE} spe <- MouseColonIbdCadinu2024() spe ``` Inspect the data components: ```{r datacomp} counts(spe)[1:5,1:5] logcounts(spe)[1:5,1:5] colData(spe) head(spatialCoords(spe)) ``` The dataset includes cell type labels with three levels of granularity (variables `tier1`, `tier2`, `tier3` in the `colData`). ```{r tiers} table(spe$tier1) table(spe$tier2) table(spe$tier3) ``` The data have been collected before the insurgence of colitis (`sample_type="Healthy"`) and after some time intervals (3 days, 9 days, 21 days). ```{r stype} table(spe$sample_type) ``` # Visualization ## Reduced dimensions Replicate Fig. 1C of the [paper](https://doi.org/10.1016/j.cell.2024.03.013), to visualize cell types in the UMAP space: ```{r fig1c, message=FALSE} plotReducedDim(spe, "UMAP", colour_by = "tier1", scattermore = TRUE, rasterise = TRUE) + scale_color_manual(values = metadata(spe)$colors_tier1) + labs(color = "cell type") ``` ## Spatial organization We can also replicate Fig 1.D, to see the spatial distribution of cell types in one slide: ```{r fig1d} # Filter spatial coordinates for the selected slice ID slice_coords <- spatialCoords(spe)[spe$mouse_id == "082421_D0_m6" & spe$sample_type == "Healthy" & spe$technical_repeat_number == "1" & spe$slice_id == "2", ] # Rotate coordinates to have them match the rotation in the paper slice_coords_vec <- vect(slice_coords, type = "points") slice_coords_r <- spin(slice_coords_vec, 180) slice_c <- as.data.frame(slice_coords_r, geom = "XY") slice_df <- data.frame(x = slice_c[,1], y = slice_c[,2], tier1 = spe$tier1[spe$mouse_id == "082421_D0_m6" & spe$sample_type == "Healthy" & spe$technical_repeat_number == "1" & spe$slice_id == "2"]) # Plot ggplot(data = slice_df, aes(x = x, y = y, color = tier1)) + geom_point(shape = 19, size = 0.5) + scale_color_manual(values = metadata(spe)$colors_tier1) + guides(colour = guide_legend(override.aes = list(size = 2))) + labs(color = "cell type") + theme_bw(10) ``` We can compare the spatial organization of cells before the insurgence of colitis and at the considered timepoints (after 3, 9, and 21 days): ```{r, fig.height = 8, fig.width = 10} # Filter spatial coordinates for the selected slice ID slice_coords <- spatialCoords(spe)[(spe$mouse_id == "062921_D0_m3a" & spe$slice_id=="2") | (spe$mouse_id == "092421_D3_m1" & spe$slice_id=="2")| (spe$mouse_id == "062221_D9_m3" & spe$slice_id=="2") | (spe$mouse_id == "082421_D21_m1" & spe$slice_id=="2"), ] slice_df <- data.frame(x = scale(slice_coords[, 1], scale = FALSE), y = scale(slice_coords[, 2], scale = FALSE), tier1 = spe$tier1[(spe$mouse_id == "062921_D0_m3a" & spe$slice_id=="2") | (spe$mouse_id == "092421_D3_m1" & spe$slice_id=="2")| (spe$mouse_id == "062221_D9_m3" & spe$slice_id=="2") | (spe$mouse_id == "082421_D21_m1" & spe$slice_id=="2")], day = spe$sample_type[(spe$mouse_id == "062921_D0_m3a" & spe$slice_id=="2") | (spe$mouse_id == "092421_D3_m1" & spe$slice_id=="2")| (spe$mouse_id == "062221_D9_m3" & spe$slice_id=="2") | (spe$mouse_id == "082421_D21_m1" & spe$slice_id=="2")]) slice_df$day <- factor(slice_df$day, levels = c("Healthy", "DSS3", "DSS9", "DSS21")) ggplot(data = slice_df, aes(x = x, y = y, color = tier1)) + geom_point(shape = 19, size = 0.5) + scale_color_manual(values = metadata(spe)$colors_tier1) + theme_bw(10) + guides(colour = guide_legend(override.aes = list(size=2)))+ labs(color = "cell type") + facet_wrap( ~ day, ncol = 2, nrow = 2, scales = "free") ``` We can also restrict the analysis to a specific macro-group to see the different distribution of Tier2 cell types. See an example the different composition of epithelial cells: ```{r, fig.height = 8, fig.width = 10} slice_df$tier2 <- spe$tier2[(spe$mouse_id == "062921_D0_m3a" & spe$slice_id=="2") | (spe$mouse_id == "092421_D3_m1" & spe$slice_id=="2")| (spe$mouse_id == "062221_D9_m3" & spe$slice_id=="2") | (spe$mouse_id == "082421_D21_m1" & spe$slice_id=="2")] slice_df$color <- ifelse(slice_df$tier1 == "Epithelial", as.character(slice_df$tier2), "grey") slice_df$color <- factor(slice_df$color) colored_df <- slice_df[slice_df$tier1 == "Epithelial", ] ggplot() + geom_point(data = slice_df, aes(x = x, y = y), color = "grey", shape = 19, size = 0.1) + geom_point(data = colored_df, aes(x = x, y = y, color = tier2), shape = 19, size = 0.1) + scale_color_manual(values = metadata(spe)$colors_tier2) + theme_bw(10) + guides(colour = guide_legend(override.aes = list(size = 2))) + labs(color = 'cell type') + facet_wrap(~ day, ncol = 2, scales = "free") ``` ```{r, fig.height = 8, fig.width = 10} slice_df$color <- ifelse(slice_df$tier1 == "Immune", as.character(slice_df$tier2), "grey") slice_df$color <- factor(slice_df$color) colored_df <- subset(slice_df, tier1 == "Immune") p <- ggplot() + geom_point(data = slice_df, aes(x = x, y = y), color = "grey", shape = 19, size = 0.1) + geom_point(data = colored_df, aes(x = x, y = y, color = tier2), shape = 19, size = 0.1) + scale_color_manual(values = metadata(spe)$colors_tier2) + theme_bw(10) + guides(colour = guide_legend(override.aes = list(size = 2))) + labs(color = 'cell type') + facet_wrap(~ day, ncol = 2, scales = "free") p ``` # Interactive exploration The MERFISH mouse colon IBD dataset is part of the [gallery of publicly available MERFISH datasets](https://moffittlab.connect.hms.harvard.edu/merfish/merfish_homepage.html). This gallery consists of dedicated [iSEE](https://bioconductor.org/packages/iSEE) and [Vitessce](http://vitessce.io/) instances, published on [Posit Connect](https://posit.co/products/enterprise/connect/), that enable the interactive exploration of different segmentations, the expression of marker genes, and overlay of cell metadata on a spatial grid or a microscopy image. # SessionInfo ```{r sessionInfo} sessionInfo() ```