--- title: "Multi-Tissue Analysis" author: "Michael T. Zimmermann" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Multi-Tissue Analysis} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} %\VignetteDepends{BgeeDB} --- In this vignette, we will analyze a gene expression dataset with samples from multiple tissues. We will: *download a public dataset *identify the genes expressed in two tissues *run enrichment analysis, cognizant of each tissues' expression profile *visualize network-based relationships between the tissues' expression profiles # Enrichment to Identify Tissue-Specific Patterns We will use data from BgeeDB normal-tissue expression. In research, we will typically want to compare normal to one or more treatment or disease groups. Thus, consider this as an illustrative example. ```{r bgee1, echo=TRUE, eval=FALSE, warning=FALSE, fig.width = 7, fig.height = 7, fig.align='center'} # Load RITAN library(RITANdata) library(RITAN) # Install the Bgee package. GO.db is a dependency of a dependency and may need to be installed seperately. for (pkg in c('GO.db','BgeeDB','biomaRt')){ if (! (pkg %in% rownames(installed.packages()) )){ if (!requireNamespace("BiocManager", quietly=TRUE)) install.packages("BiocManager") BiocManager::install(pkg) } library(pkg, character.only = TRUE) } for (pkg in c('tidyselect','venn','magrittr','ggplot2','igraph')){ if (! (pkg %in% rownames(installed.packages()) )){ install.packages(pkg) } library(pkg, character.only = TRUE) } # Setup Bgee query & get data (this may take some time) bgee <- Bgee$new(species = "Homo_sapiens", dataType = "rna_seq", release = "13.2") data <- getData(bgee) e <- formatData(bgee, data[[1]], callType = "present", stats = "rpkm") # Explore the dataset with: str(sampleNames(e)), str(featureNames(e)), str(phenoData(e)) table(phenoData(e)@data$Anatomical.entity.name) ## -------------------- - ## Get expression in two tissues tmp <- exprs(e)[ , phenoData(e)@data$Anatomical.entity.name == "heart" ] i <- apply( tmp, 1, function(x){ any(is.na(x)) }) expr_heart <- tmp[ !i, ] tmp <- exprs(e)[ , phenoData(e)@data$Anatomical.entity.name == "skeletal muscle tissue" ] i <- apply( tmp, 1, function(x){ any(is.na(x)) }) expr_skele <- tmp[ !i, ] venn::venn( list(Heart = rownames(expr_heart), Skeletal = rownames(expr_skele) ), cexil= 1, cexsn = 1, zcolor = "style" ) ## -------------------- - ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", "http://Aug2017.archive.ensembl.org" ) # version 90 map_heart <- getBM( attributes = c('ensembl_gene_id','ensembl_transcript_id','hgnc_symbol'), filters = 'ensembl_gene_id', values = rownames(expr_heart), mart = ensembl ) map_skele <- getBM( attributes = c('ensembl_gene_id','ensembl_transcript_id','hgnc_symbol'), filters = 'ensembl_gene_id', values = rownames(expr_skele), mart = ensembl ) ## -------------------- - ## Functions associated with each tissue's top genes ## Important: the p-values reported here are observational, not inferential. mh <- apply( expr_heart, 1, mean ) top_heart <- map_heart$hgnc_symbol[ map_heart$ensembl_gene_id %in% rownames( expr_heart )[ mh > quantile(mh, .975) ] ] %>% setdiff(.,'') ms <- apply( expr_skele, 1, mean ) top_skele <- map_skele$hgnc_symbol[ map_skele$ensembl_gene_id %in% rownames( expr_skele )[ ms > quantile(ms, .975) ] ] %>% setdiff(.,'') e <- term_enrichment_by_subset( list( Heart = top_heart, Skeletal = top_skele ), resources = 'GO_slim_PIR', all_symbols = cached_coding_genes ) plot( e[ apply(e[, c(3:4)], 1, max) >= 12, ], cap=40, label_size_y = 8, wrap_y_labels = FALSE ) ## -------------------- - ## Network Interactions Within Each Tissue net_h <- network_overlap( top_heart, resources = c('CCSB','dPPI','HumanNet') ) net_s <- network_overlap( top_skele, resources = c('CCSB','dPPI','HumanNet') ) net2g <- function(x){ edges <- as.matrix( x[, c(1,3)] ) G <- igraph::make_undirected_graph( c(t(edges)) ) return(G) } g_h <- net2g( net_h ) g_s <- net2g( net_s ) g_dif <- igraph::difference( g_h, g_s ) g_int <- igraph::intersection( g_h, g_s ) cat(sprintf(' Of the top expressed genes, %d are shared and %d differ. ', length(V(g_int)), length(V(g_dif)) )) par(mar=rep(0,4)) plot(g_dif, vertex.size = 2, vertex.label = NA, vertex.frame.color = 'white', layout = layout_nicely ) ```