## ----knitr, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", cache = FALSE ) ## ----setup, eval = FALSE------------------------------------------------------ # # install from bioconductor # if (!require(BiocManager)) { # install.packages('BiocManager') # BiocManager::install('leapR') # } # ## ----libraries, message=FALSE, warning = FALSE-------------------------------- # load the core libraries library(leapR) library(gplots) library(rmarkdown) # plotting helpers used in this vignette library(ggplot2) library(dplyr) library(tibble) library(stringr) ## ----pathdb------------------------------------------------------------------- data(ncipid) ## ----omicsdata, message=FALSE, warning=FALSE, echo = FALSE-------------------- url <- "https://api.figshare.com/v2/file/download/56536217" pdata <- download.file(url, method = "libcurl", destfile = "protData.rda") # as.matrix() load("protData.rda") p <- file.remove("protData.rda") url <- "https://api.figshare.com/v2/file/download/56536214" tdata <- download.file(url, method = "libcurl", destfile = "transData.rda") load("transData.rda") p <- file.remove("transData.rda") url <- "https://api.figshare.com/v2/file/download/56536211" phdata <- download.file(url, method = "libcurl", destfile = "phosData.rda") load("phosData.rda") p <- file.remove("phosData.rda") ## ----show_pset_example-------------------------------------------------------- # Inspect the `pset` SummarizedExperiment. str(pset) dim(SummarizedExperiment::assay(pset, "proteomics")) head(rownames(pset)) head(colnames(pset)) ## ----ptlists------------------------------------------------------------------ data(shortlist) data(longlist) ## columns that we want to use for results cols_to_display <- c("ingroup_n", "outgroup_n", "background_n", "pvalue", "BH_pvalue") ## ----abundance, echo=TRUE, warning=FALSE, message=FALSE----------------------- # in this example we lump a bunch of patients together (the 'short survivors') # and compare them to another group (the 'long survivors') ### using enrichment_wrapper function protdata.enrichment.svl <- leapR::leapR( geneset = ncipid, enrichment_method = "enrichment_comparison", eset = pset, assay_name = "proteomics", primary_columns = shortlist, secondary_columns = longlist ) or <- order(unlist(protdata.enrichment.svl[, "pvalue"])) rmarkdown::paged_table(protdata.enrichment.svl[or, cols_to_display]) # another application is to compare just one patient against another # (this would be the equivalent of comparing one time point to another) ### using enrichment_wrapper function protdata.enrichment.svl.ovo <- leapR::leapR( geneset = ncipid, enrichment_method = "enrichment_comparison", eset = pset, assay_name = "proteomics", primary_columns = shortlist[1], secondary_columns = longlist[1] ) or <- order(unlist(protdata.enrichment.svl.ovo[, "pvalue"])) rmarkdown::paged_table(protdata.enrichment.svl.ovo[or, cols_to_display]) ## ----fishers, echo=TRUE, warning=FALSE, message=FALSE------------------------- # for this example we will construct a list of genes from the expression data # to emulate what you might be inputting genelist <- rownames(pset)[which(SummarizedExperiment::assay(pset, "proteomics")[, 1] > 0.5)] protdata.enrichment.sets.test <- leapR::leapR( geneset = ncipid, enrichment_method = "enrichment_in_sets", eset = pset, assay_name = "proteomics", targets = genelist ) or <- order(protdata.enrichment.sets.test[, "pvalue"]) rmarkdown::paged_table(protdata.enrichment.sets.test[or, cols_to_display]) # in this example we construct some modules from the hierarchical clustering # of the data protdata_naf <- SummarizedExperiment::assay(pset, "proteomics") # hierarchical clustering is not too happy with lots of missing values # so we'll do a zero fill on this to get the modules protdata_naf[which(is.na(protdata_naf))] <- 0 # construct the hierarchical clustering using the 'wardD' method, which # seems to give more even sized modules protdata_hc <- hclust(dist(protdata_naf), method = "ward.D2") # arbitrarily we'll chop the clusters into 5 modules modules <- cutree(protdata_hc, k = 5) ## sara: created list clusters <- lapply(unique(modules), function(x) names(which(modules == x))) # modules is a named list of values where each value is a module # number and the name is the gene name # To do enrichment for one module (module 1 in this case) do this protdata.enrichment.sets.module_1 <- leapR::leapR( geneset = ncipid, enrichment_method = "enrichment_in_sets", eset = pset, assay_name = "proteomics", targets = names(modules[which(modules == 1)]) ) # To do enrichment on all modules and return the list of enrichment results protdata.enrichment.sets.modules <- do.call(rbind, leapR::cluster_enrichment( eset = pset, assay_name= 'proteomics', geneset = ncipid, clusters = clusters, sigfilter = 0.25)) ## nothing is enriched rmarkdown::paged_table(protdata.enrichment.sets.modules[, cols_to_display]) ## ----fishers_plot, echo=TRUE, warning=FALSE, message=FALSE-------------------- # Plot the top enriched gene sets from Fisher's exact test # Stars indicate significance (None seen here) plot_leapr_bar( protdata.enrichment.sets.test, title = "Fisher's Exact Test: Top Enriched Pathways", top_n = 10, star_thresholds = c(0.05, 0.01, 0.001), wrap = 40 ) ## ----ks, echo=TRUE, warning = FALSE, message=FALSE---------------------------- # This is how you calculate enrichment in a ranked list # (for example from topology) ### using enrichment_wrapper function protdata.enrichment.order <- leapR::leapR( geneset = ncipid, "enrichment_in_order", eset = pset, method = 'ks', assay_name = "proteomics", primary_columns = shortlist[1] ) or <- order(protdata.enrichment.order[, "pvalue"]) rmarkdown::paged_table(protdata.enrichment.order[or, cols_to_display]) ## ----ks_plot, echo=TRUE, warning = FALSE, message=FALSE----------------------- # Plot the ranked enrichment results plot_leapr_bar( protdata.enrichment.order, title = "Kolmogorov-Smirnov Test: Ranked Enrichment", top_n = 12, fill_sig = "#2166AC", fill_ns = "#B2DFDB", wrap = 38 ) ## ----zt, echo=TRUE, warning = FALSE, message=FALSE---------------------------- # This is how you calculate enrichment in a ranked list # (for example from topology) ### using enrichment_wrapper function protdata.enrichment.order <- leapR::leapR( geneset = ncipid, "enrichment_in_order", eset = pset, method = 'ztest', assay_name = "proteomics", primary_columns = shortlist[1] ) or <- order(protdata.enrichment.order[, "pvalue"]) rmarkdown::paged_table(protdata.enrichment.order[or, cols_to_display]) plot_leapr_bar( protdata.enrichment.order, title = "Z Test: Ranked Enrichment", top_n = 12, fill_sig = "#2166AC", fill_ns = "#B2DFDB", wrap = 38 ) ## ----correlation, echo=TRUE, warning=FALSE, message=FALSE--------------------- ### using enrichment_wrapper function protdata.enrichment.correlation <- leapR::leapR( geneset = ncipid, enrichment_method = "correlation_enrichment", assay_name = "proteomics", eset = pset ) or <- order(protdata.enrichment.correlation[, "pvalue"]) rmarkdown::paged_table(head(protdata.enrichment.correlation[or, cols_to_display])) protdata.enrichment.correlation.short <- leapR::leapR( geneset = ncipid, enrichment_method = "correlation_enrichment", assay_name = "proteomics", eset = pset[, shortlist] ) or <- order(protdata.enrichment.correlation.short[, "pvalue"]) rmarkdown::paged_table(head(protdata.enrichment.correlation.short[or, cols_to_display])) protdata.enrichment.correlation.long <- leapR::leapR( geneset = ncipid, enrichment_method = "correlation_enrichment", assay_name = "proteomics", eset = pset[, longlist] ) or <- order(protdata.enrichment.correlation.long[, "pvalue"]) rmarkdown::paged_table(head(protdata.enrichment.correlation.long[or, cols_to_display])) ## ----corr_plot, echo=TRUE, warning=FALSE, message=FALSE----------------------- # Compare correlation patterns across conditions plot_leapr_bar( protdata.enrichment.correlation.short, title = "Correlation Enrichment: Short Survivors", top_n = 10, fill_sig = "#1B9E77", fill_ns = "#D8F0E8", wrap = 36 ) ## ----phosphoproteomics, echo=TRUE, warning=FALSE, message=FALSE--------------- data("kinasesubstrates") # for an individual tumor calculate the Kinase-Substrate # Enrichment (similar to KSEA) # This uses the site-specific phosphorylation data to determine # which kinases # might be active by assessing the enrichment of the # phosphorylation of their known substrates phosphodata.ksea.order <- leapR::leapR( geneset = kinasesubstrates, enrichment_method = "enrichment_in_order", assay_name = "phosphoproteomics", eset = phset, method = 'ztest', primary_columns = "TCGA-13-1484") or <- order(phosphodata.ksea.order[, "pvalue"]) rmarkdown::paged_table(phosphodata.ksea.order[or, cols_to_display]) # now do the same thing but use a threshold phosphodata.sets.order <- leapR::leapR( geneset = kinasesubstrates, enrichment_method = "enrichment_in_sets", eset = phset, assay_name = "phosphoproteomics", threshold = 0.5, primary_columns = "TCGA-13-1484" ) or <- order(phosphodata.sets.order[, "pvalue"]) rmarkdown::paged_table(phosphodata.sets.order[or, cols_to_display]) ## ----phos_plot, echo=TRUE, warning=FALSE, message=FALSE----------------------- plot <- plot_leapr_bar( phosphodata.sets.order, title = "Kinase Substrate Enrichment (Phosphoproteomics)", top_n = 15, star_thresholds = c(0.05, 0.01, 1e-3), wrap = 36, fill_sig = "#0F766E", # dark teal for significant fill_ns = "#99F6E4", # light teal for non-significant outline = "grey30", axis_text_y_size = 7, axis_text_x_size = 8 ) plot # You can also modify the plot further using standard ggplot2 arguments plot + ggplot2::labs( y = expression(-log[10]("adjusted p-value")), caption = "* p<0.05, ** p<0.01, *** p<0.001" ) ## ----session------------------------------------------------------------------ sessionInfo()