--- title: "Manuscript code - transcriptional signature identification" author: "Stefano Mangiola" date: "`r Sys.Date()`" package: tidybulk output: BiocStyle::html_document: toc_float: true vignette: > %\VignetteEngine{knitr::knitr} %\VignetteIndexEntry{Manuscript code - transcriptional signature identification} %\usepackage[UTF-8]{inputenc} --- This document includes the code used for the manuscript, for the transcriptional signature identification. ```{r, echo=FALSE, include=FALSE} library(knitr) #library(kableExtra) knitr::opts_chunk$set(cache = TRUE, warning = FALSE, message = FALSE, cache.lazy = FALSE) #options(width = 120) options(pillar.min_title_chars = Inf) library(magrittr) library(tibble) library(dplyr) library(magrittr) library(tidyr) library(ggplot2) library(rlang) library(purrr) library(tidybulk) my_theme = theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size=12), legend.position="bottom", aspect.ratio=1, strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) ) ``` ```{r, eval=FALSE,echo=FALSE, include=FALSE} load("../dev/counts_cell_type.rda") options(tidybulk_do_validate = FALSE) ``` ```{r, eval = FALSE} counts_scaled = counts_cell_type %>% # Convert to tidybulk tibble tidybulk(sample, symbol, count) %>% # Preprocess and scale the data aggregate_duplicates() %>% identify_abundant() %>% scale_abundance() %>% # Impute missing sample-transcript pairs impute_missing_abundance(~cell_type) %>% mutate(.abundant = TRUE) ``` ```{r, eval = FALSE} counts_non_red = counts_scaled %>% # Perform operation for each cell type nest(data = -cell_type) %>% mutate(data = map( data, ~ .x %>% remove_redundancy( method="correlation", correlation_threshold = 0.99, top=1000 ) )) %>% unnest(data) ``` ```{r, eval = FALSE} # Select genes that are in at least one sample for all cell types gene_all = counts_non_red %>% distinct(symbol, cell_type) %>% count(symbol) %>% filter(n == max(n)) # filter dataset and impute missing transcripts-samples pairs counts_non_red_common = counts_non_red %>% inner_join(gene_all) ``` ```{r, eval = FALSE} counts_non_red_common %>% reduce_dimensions(method = "tSNE", action="get") %>% ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) + geom_point(size =2) ``` ```{r, echo=F} # saveRDS(counts_non_red_common, "dev/counts_non_red_common.rds", compress = "xz") tidybulk::vignette_manuscript_signature_tsne %>% ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) + geom_point(size =2) ``` ```{r, eval = FALSE} markers = # Define all-versus-all cell type permutations counts_non_red_common %>% distinct(cell_type) %>% pull(cell_type) %>% gtools::permutations(n = length(.), r = 2, v = .) %>% as_tibble() %>% setNames(c("cell_type1", "cell_type2")) %>% mutate(contrast = sprintf("cell_type%s - cell_type%s", cell_type1, cell_type2)) %>% # Rank marker genes mutate(de = pmap( list(cell_type1, cell_type2, contrast), ~ counts_non_red_common %>% filter(cell_type %in% c(..1, ..2)) %>% test_differential_abundance(~ 0 + cell_type, .contrasts = ..3, fill_missing_values = TRUE, action="get", omit_contrast_in_colnames = T) %>% filter(logFC > 0) %>% arrange(FDR) %>% rowid_to_column(var = "i") )) %>% unnest(de) ``` ```{r, eval = FALSE} markers %>% # Filter best markers for monocytes filter(cell_type1=="monocyte" & i==1) %>% # Prettify contrasts for plotting unite(pair, c("cell_type1", "cell_type2"), remove = FALSE, sep = "\n") %>% # Reshape gather(which, cell_type, cell_type1, cell_type2) %>% distinct(pair, symbol, which, cell_type) %>% # Attach counts left_join(counts_non_red) %>% # Plot ggplot(aes(y = count_scaled + 1, x = cell_type, fill = cell_type)) + geom_boxplot() + facet_wrap(~pair+ symbol, scales ="free_x", nrow = 2) + scale_y_log10() ``` ```{r, echo=F} # saveRDS(markers, "dev/vignette_markers.rds", compress = "xz") tidybulk::vignette_manuscript_signature_boxplot %>% # Plot ggplot(aes(y = count_scaled + 1, x = cell_type, fill = cell_type)) + geom_boxplot() + facet_wrap(~pair+ symbol, scales ="free_x", nrow = 2) + scale_y_log10() ``` ```{r, eval = FALSE} markers %>% # Select first 5 markers from each cell-type pair filter(i <= 5) %>% unite(pair, c("cell_type1", "cell_type2"), remove = FALSE, sep = "\n") %>% # Reshape gather(which, cell_type, cell_type1, cell_type2) %>% distinct(symbol) %>% # Attach counts left_join(counts_non_red, by = c("symbol")) %>% # Plot reduce_dimensions(sample, symbol, count_scaled, method = "tSNE", action="get") %>% pivot_sample(sample) %>% ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) + geom_point(size =2) ``` ```{r, echo=F} tidybulk::vignette_manuscript_signature_tsne2 %>% pivot_sample(sample) %>% ggplot(aes(x = `tSNE1`, y = `tSNE2`, color = cell_type)) + geom_point(size =2) ```