## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----installation-github, eval = FALSE---------------------------------------- # # devel version # # # install.packages("devtools") # devtools::install_github("Muunraker/nipalsMCIA", ref = "devel", # force = TRUE, build_vignettes = TRUE) # devel version ## ----installation-bioconductor, eval = FALSE---------------------------------- # # release version # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("nipalsMCIA") ## ----load-packages, message = FALSE------------------------------------------- library(ComplexHeatmap) library(dplyr) library(fgsea) library(ggplot2) library(ggpubr) library(nipalsMCIA) library(stringr) # NIPALS starts with a random vector set.seed(42) ## ----intro-load-data---------------------------------------------------------- # load the dataset which uses the name data_blocks data(NCI60) # examine the contents data_blocks$miRNA[1:3, 1:3] data_blocks$mrna[1:3, 1:3] data_blocks$prot[1:3, 1:3] ## ----convert-to-mae----------------------------------------------------------- data_blocks_mae <- simple_mae(data_blocks, row_format = "sample") ## ----intro-mcia, warning = FALSE, message = FALSE----------------------------- set.seed(42) mcia_results <- nipals_multiblock(data_blocks_mae, col_preproc_method = "colprofile", num_PCs = 10, tol = 1e-12, plots = "none") ## ----intro-mcia-results------------------------------------------------------- slotNames(mcia_results) ## ----part-1-mcia, warning = FALSE, message = FALSE, fig.dim = c(9, 5)--------- set.seed(42) mcia_results <- nipals_multiblock(data_blocks_mae, col_preproc_method = "colprofile", num_PCs = 10, tol = 1e-12) ## ----part-1-visualize-clusters, fig.dim = c(5, 5)----------------------------- projection_plot(mcia_results, projection = "global", orders = c(1, 2)) ## ----part-1-metadata---------------------------------------------------------- # preview of metadata head(metadata_NCI60) # loading of mae with metadata data_blocks_mae <- simple_mae(data_blocks, row_format = "sample", colData = metadata_NCI60) ## ----part-1-mcia-again, warning = FALSE, message = FALSE---------------------- # adding metadata as part of the nipals_multiblock() function set.seed(42) mcia_results <- nipals_multiblock(data_blocks_mae, col_preproc_method = "colprofile", plots = "none", num_PCs = 10, tol = 1e-12) ## ----part-1-visualize-clusters-color-col, fig.dim = c(5, 5)------------------- # meta_colors = c("blue", "grey", "yellow") can use color names # meta_colors = c("#00204DFF", "#7C7B78FF", "#FFEA46FF") can use hex codes meta_colors <- get_metadata_colors(mcia_results, color_col = 1, color_pal_params = list(option = "E")) projection_plot(mcia_results, projection = "global", orders = c(1, 2), color_col = "cancerType", color_pal = meta_colors, legend_loc = "bottomleft") ## ----part-1-global-heatmap-colored, fig.dim = c(6, 4)------------------------- global_scores_heatmap(mcia_results = mcia_results, color_col = "cancerType", color_pal = meta_colors) ## ----part-2-block-heatmap, fig.dim = c(4.5, 3)-------------------------------- block_weights_heatmap(mcia_results) ## ----part-2-loadings, fig.dim = c(6, 4)--------------------------------------- # colors_omics = c("red", "blue", "green") # colors_omics <- get_colors(mcia_results, color_pal = colors_omics) colors_omics <- get_colors(mcia_results) vis_load_plot(mcia_results, axes = c(1, 4), color_pal = colors_omics) ## ----part-2-factor-1, fig.dim = c(8, 3)--------------------------------------- # generate the visualizations all_pos_1_vis <- vis_load_ord(mcia_results, omic = "all", factor = 1, absolute = FALSE, descending = TRUE) mrna_pos_1_vis <- vis_load_ord(mcia_results, omic = "mrna", factor = 1, absolute = FALSE, descending = TRUE) ggpubr::ggarrange(all_pos_1_vis, mrna_pos_1_vis) ## ----part-2-factor-1-orderings------------------------------------------------ # obtain the loadings as plotted above all_pos_1 <- ord_loadings(mcia_results, omic = "all", factor = 1, absolute = FALSE, descending = TRUE) mrna_pos_1 <- ord_loadings(mcia_results, omic = "mrna", factor = 1, absolute = FALSE, descending = TRUE) ## ----part-2-factor-1-orderings-1---------------------------------------------- # visualize the tabular data all_pos_1[1:3, ] ## ----part-2-factor-1-orderings-2---------------------------------------------- mrna_pos_1[1:3, ] ## ----part-2-factor-2, fig.dim = c(8, 3)--------------------------------------- # define the loadings all_abs_2 <- vis_load_ord(mcia_results, omic = "all", factor = 2, absolute = TRUE, descending = TRUE) prot_abs_2 <- vis_load_ord(mcia_results, omic = "prot", factor = 2, absolute = TRUE, descending = TRUE) ggpubr::ggarrange(all_abs_2, prot_abs_2) ## ----part-2-factor-2-orderings------------------------------------------------ # obtain the loadings as plotted above all_abs_2_vis <- ord_loadings(mcia_results, omic = "all", factor = 2, absolute = TRUE, descending = TRUE) prot_abs_2_vis <- ord_loadings(mcia_results, omic = "prot", factor = 2, absolute = TRUE, descending = TRUE) ## ----part-2-factor-2-orderings-1---------------------------------------------- # visualize the tabular data all_abs_2_vis[1:3, ] ## ----part-2-factor-2-orderings-2---------------------------------------------- prot_abs_2_vis[1:3, ] ## ----part-2-factor-4, fig.dim = c(10, 4)-------------------------------------- # visualization all_4_plot <- vis_load_ord(mcia_results, omic = "all", factor = 4, absolute = FALSE, descending = TRUE, n_feat = 60) all_4_plot ## ----part-2-factor-4-orderings, fig.dim = c(10, 4)---------------------------- # visualize the tabular data all_4 <- ord_loadings(mcia_results, omic = "all", factor = 4, absolute = FALSE, descending = TRUE) ## ----part-2-pathways, echo = TRUE, message = FALSE, warning = FALSE, results = "hide"---- # extract mRNA global loadings mrna_gfscores <- nmb_get_gl(mcia_results) mrna_rows <- str_detect(row.names(mrna_gfscores), "_mrna") mrna_gfscores <- mrna_gfscores[mrna_rows, ] # rename rows to contain HUGO based gene symbols row.names(mrna_gfscores) <- str_remove(rownames(mrna_gfscores), "_[0-9]*_.*") # load pathway data path.database <- "https://data.broadinstitute.org/gsea-msigdb/msigdb/release/6.2/c2.cp.reactome.v6.2.symbols.gmt" pathways <- fgsea::gmtPathways(gmt.file = path.database) # generate the GSEA report geneset_report <- gsea_report(metagenes = mrna_gfscores, path.database, factors = seq(1, 3), pval.thr = 0.05, nproc = 2) ## ----include = FALSE---------------------------------------------------------- # # Apply scientific notation to min_pval geneset_report[[1]]$min_pval <- sprintf("%.2e", geneset_report[[1]]$min_pval) ## ----part-2-genesets---------------------------------------------------------- geneset_report[[1]] ## ----part-2-gsea-for-factor-3, echo = TRUE, message = FALSE, warning = FALSE, results = "hide"---- # re-running GSEA factor3_paths <- fgseaMultilevel(pathways, stats = mrna_gfscores[, 3], nPermSimple = 10000, minSize = 15, nproc = 2, maxSize = 500) ## ----include=FALSE------------------------------------------------------------ # Order by adjusted p value factor3_paths <- factor3_paths[order(factor3_paths$padj), ] # Apply scientific notation to padj to_sci <- function(x) return(sprintf("%.2e", x)) factor3_paths$padj <- vapply(factor3_paths$padj, to_sci, character(1)) # Clean up pathway name clean_pathway_name <- function(x) { replaced_value <- str_replace_all(x , "_", " ") if (nchar(replaced_value) > 49) { substring_value <- substr(replaced_value, 1, 50) return(paste0(substring_value, "...")) } else { return(replaced_value) } } factor3_paths[, "pathway"] <- apply(X = factor3_paths[, "pathway"], MARGIN = 1, FUN = clean_pathway_name) ## ----part-2-gsea-for-factor-3-viz--------------------------------------------- head(factor3_paths[, c("pathway", "padj")]) ## ----part-2-significant------------------------------------------------------- # extracting REACTOME_CELL_CYCLE, the most significant gene set sig_path3 <- factor3_paths[min(factor3_paths$padj) == factor3_paths$padj, ][1, ] sig_path3$leadingEdge[[1]][1:10] ## ----session-info------------------------------------------------------------- sessionInfo()