## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = TRUE, crop = NULL ) ## ----install-pkg-------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("treeclimbR") ## ----setup-------------------------------------------------------------------- suppressPackageStartupMessages({ library(TreeSummarizedExperiment) library(treeclimbR) library(ggtree) library(dplyr) library(ggplot2) }) ## ----da-load-and-visualize-data----------------------------------------------- ## Read data da_lse <- readRDS(system.file("extdata", "da_sim_100_30_18de.rds", package = "treeclimbR")) da_lse ## Generate tree visualization where true signal leaves are colored orange ## ...Find internal nodes in the subtrees where all leaves are differentially ## abundant. These will be colored orange. nds <- joinNode(tree = rowTree(da_lse), node = rownames(da_lse)[rowData(da_lse)$Signal]) br <- unlist(findDescendant(tree = rowTree(da_lse), node = nds, only.leaf = FALSE, self.include = TRUE)) df_color <- data.frame(node = showNode(tree = rowTree(da_lse), only.leaf = FALSE)) |> mutate(signal = ifelse(node %in% br, "yes", "no")) ## ...Generate tree da_fig_tree <- ggtree(tr = rowTree(da_lse), layout = "rectangular", branch.length = "none", aes(color = signal)) %<+% df_color + scale_color_manual(values = c(no = "grey", yes = "orange")) ## ...Zoom into the subtree defined by a particular node. In this case, we ## know that all true signal leaves were sampled from the subtree defined ## by a particular node (stored in metadata(da_lse)$parentNodeForSignal). da_fig_tree <- scaleClade(da_fig_tree, node = metadata(da_lse)$parentNodeForSignal, scale = 4) ## Extract count matrix and scale each row to [0, 1] count <- assay(da_lse, "counts") scale_count <- t(apply(count, 1, FUN = function(x) { xx <- x rx <- (max(xx) - min(xx)) (xx - min(xx))/max(rx, 1) })) rownames(scale_count) <- rownames(count) colnames(scale_count) <- colnames(count) ## Plot tree and heatmap of scaled counts ## ...Generate sample annotation vv <- gsub(pattern = "_.*", "", colnames(count)) names(vv) <- colnames(scale_count) anno_c <- structure(vv, names = vv) TreeHeatmap(tree = rowTree(da_lse), tree_fig = da_fig_tree, hm_data = scale_count, legend_title_hm = "Scaled\ncount", column_split = vv, rel_width = 0.6, tree_hm_gap = 0.3, column_split_label = anno_c) + scale_fill_viridis_c(option = "B") + scale_y_continuous(expand = c(0, 10)) ## ----da-aggregate------------------------------------------------------------- ## Get a list of all node IDs all_node <- showNode(tree = rowTree(da_lse), only.leaf = FALSE) ## Calculate counts for internal nodes da_tse <- aggTSE(x = da_lse, rowLevel = all_node, rowFun = sum) da_tse ## ----da-run-edger------------------------------------------------------------- ## Run differential analysis da_res <- runDA(da_tse, assay = "counts", option = "glmQL", design = model.matrix(~ group, data = colData(da_tse)), contrast = c(0, 1), filter_min_count = 0, filter_min_prop = 0, filter_min_total_count = 15) ## ----da-runda-res------------------------------------------------------------- names(da_res) class(da_res$edgeR_results) ## Nodes with too low total count da_res$nodes_drop ## ----da-node-results---------------------------------------------------------- da_tbl <- nodeResult(da_res, n = Inf, type = "DA") dim(da_tbl) head(da_tbl) ## ----da-get-cand-------------------------------------------------------------- ## Get candidates da_cand <- getCand(tree = rowTree(da_tse), score_data = da_tbl, node_column = "node", p_column = "PValue", threshold = 0.05, sign_column = "logFC", message = FALSE) ## ----da-plot-cand------------------------------------------------------------- ## All candidates names(da_cand$candidate_list) ## Nodes contained in the candidate corresponding to t = 0.03 ## This is a mix of leaves and internal nodes (da_cand_0.03 <- da_cand$candidate_list[["0.03"]]) ## Visualize candidate da_fig_tree + geom_point2(aes(subset = (node %in% da_cand_0.03)), color = "navy", size = 0.5) + labs(title = "t = 0.03") + theme(legend.position = "none", plot.title = element_text(color = "navy", size = 7, hjust = 0.5, vjust = -0.08)) ## ----da-eval-cand------------------------------------------------------------- ## Evaluate candidates da_best <- evalCand(tree = rowTree(da_tse), levels = da_cand$candidate_list, score_data = da_tbl, node_column = "node", p_column = "PValue", sign_column = "logFC") ## ----da-info-cand------------------------------------------------------------- infoCand(object = da_best) ## ----da-topnodes-------------------------------------------------------------- da_out <- topNodes(object = da_best, n = Inf, p_value = 0.05) ## ----da-plot-significant------------------------------------------------------ da_fig_tree + geom_point2(aes(subset = node %in% da_out$node), color = "red") ## ----da-fdr-tpr--------------------------------------------------------------- fdr(rowTree(da_tse), truth = rownames(da_lse)[rowData(da_lse)$Signal], found = da_out$node, only.leaf = TRUE) tpr(rowTree(da_tse), truth = rownames(da_lse)[rowData(da_lse)$Signal], found = da_out$node, only.leaf = TRUE) ## ----ds-load-and-visualize-data----------------------------------------------- ds_tse <- readRDS(system.file("extdata", "ds_sim_20_500_8de.rds", package = "treeclimbR")) ds_tse ## Assignment of cells to high-resolution clusters, samples and conditions head(colData(ds_tse)) ## Tree providing the successive aggregation of the high-resolution clusters ## into more coarse-grained ones ## Node numbers are indicated in blue, node labels in orange ggtree(colTree(ds_tse)) + geom_text2(aes(label = node), color = "darkblue", hjust = -0.5, vjust = 0.7) + geom_text2(aes(label = label), color = "darkorange", hjust = -0.1, vjust = -0.7) ## ----ds-list-truth------------------------------------------------------------ rowData(ds_tse)[rowData(ds_tse)$Signal != "no", , drop = FALSE] ## ----ds-aggregate------------------------------------------------------------- ds_se <- aggDS(TSE = ds_tse, assay = "counts", sample_id = "sample_id", group_id = "group", cluster_id = "cluster_id", FUN = sum) ds_se ## ----ds-ncells---------------------------------------------------------------- metadata(ds_se)$n_cells ## ----ds-run-edger------------------------------------------------------------- ds_res <- runDS(SE = ds_se, tree = colTree(ds_tse), option = "glmQL", design = model.matrix(~ group, data = colData(ds_se)), contrast = c(0, 1), filter_min_count = 0, filter_min_total_count = 1, filter_min_prop = 0, min_cells = 5, group_column = "group", message = FALSE) ## ----ds-runds-res------------------------------------------------------------- names(ds_res) names(ds_res$edgeR_results) ## ----ds-node-results---------------------------------------------------------- ds_tbl <- nodeResult(ds_res, type = "DS", n = Inf) dim(ds_tbl) head(ds_tbl) ## ----ds-get-cand-------------------------------------------------------------- ## Split result table by feature ds_tbl_list <- split(ds_tbl, f = ds_tbl$feature) ## Find candidates for each gene separately ds_cand_list <- lapply(seq_along(ds_tbl_list), FUN = function(x) { getCand( tree = colTree(ds_tse), t = seq(from = 0.05, to = 1, by = 0.05), score_data = ds_tbl_list[[x]], node_column = "node", p_column = "PValue", sign_column = "logFC", message = FALSE)$candidate_list }) names(ds_cand_list) <- names(ds_tbl_list) ## ----ds-eval-cand------------------------------------------------------------- ds_best <- evalCand(tree = colTree(ds_tse), type = "multiple", levels = ds_cand_list, score_data = ds_tbl_list, node_column = "node", p_column = "PValue", sign_column = "logFC", feature_column = "feature", limit_rej = 0.05, message = FALSE, use_pseudo_leaf = FALSE) ds_out <- topNodes(object = ds_best, n = Inf, p_value = 0.05) ds_out ## ----ds-check-cand------------------------------------------------------------ lapply(findDescendant(colTree(ds_tse), node = c(11, 13, 5, 9), self.include = TRUE, use.alias = FALSE, only.leaf = TRUE), function(x) convertNode(colTree(ds_tse), node = x)) ## ----ds-list-truth-again------------------------------------------------------ rowData(ds_tse)[rowData(ds_tse)$Signal != "no", , drop = FALSE] ## ----session-info------------------------------------------------------------- sessionInfo()