## ----setup, include=FALSE-------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE) ## ----strTSE, echo=FALSE, fig.cap= "The structure of the TreeSummarizedExperiment class."---- knitr::include_graphics("tse.png") ## -------------------------------------------------------------------------- suppressPackageStartupMessages({ library(TreeSummarizedExperiment) library(S4Vectors) library(ggtree) library(ape)}) ## -------------------------------------------------------------------------- # assays data assay_data <- rbind(rep(0, 4), matrix(1:16, nrow = 4)) colnames(assay_data) <- paste(rep(LETTERS[1:2], each = 2), rep(1:2, 2), sep = "_") rownames(assay_data) <- paste("entity", seq_len(5), sep = "") assay_data ## -------------------------------------------------------------------------- # row data row_data <- DataFrame(var1 = sample(letters[1:2], 5, replace = TRUE), var2 = sample(c(TRUE, FALSE), 5, replace = TRUE), row.names = rownames(assay_data)) row_data # column data col_data <- DataFrame(gg = c(1, 2, 3, 3), group = rep(LETTERS[1:2], each = 2), row.names = colnames(assay_data)) col_data ## -------------------------------------------------------------------------- # Toy tree 1 set.seed(1) row_tree <- rtree(5) class(row_tree) # Toy tree 2 set.seed(4) col_tree <- rtree(4) col_tree$tip.label <- colnames(assay_data) col_tree$node.label <- c("All", "GroupA", "GroupB") ## -------------------------------------------------------------------------- class(row_tree) str(row_tree) ## ----rtree, fig.cap="\\label{rtree} The structure of the row tree"--------- # Visualize the row tree ggtree(row_tree, size = 2) + geom_text2(aes(label = node), color = "darkblue", hjust = -0.5, vjust = 0.7, size = 6) + geom_text2(aes(label = label), color = "darkorange", hjust = -0.1, vjust = -0.7, size = 6) ## ----ctree, fig.cap="\\label{ctree} The structure of the column tree"------ # Visualize the column tree ggtree(col_tree, size = 2) + geom_text2(aes(label = node), color = "darkblue", hjust = -0.5, vjust = 0.7, size = 6) + geom_text2(aes(label = label), color = "darkorange", hjust = -0.1, vjust = -0.7, size = 6) ## -------------------------------------------------------------------------- # provide the node labels in rowNodeLab node_lab <- row_tree$tip.label row_tse <- TreeSummarizedExperiment(assays = list(assay_data), rowData = row_data, colData = col_data, rowTree = row_tree, rowNodeLab = node_lab) ## -------------------------------------------------------------------------- row_tse ## -------------------------------------------------------------------------- all(colnames(assay_data) %in% c(col_tree$tip.label, col_tree$node.label)) both_tse <- TreeSummarizedExperiment(assays = list(assay_data), rowData = row_data, colData = col_data, rowTree = row_tree, rowNodeLab = node_lab, colTree = col_tree) ## -------------------------------------------------------------------------- both_tse ## -------------------------------------------------------------------------- # to get the first table in the assays (count <- assays(both_tse)[[1]]) ## -------------------------------------------------------------------------- # to get row data rowData(both_tse) ## -------------------------------------------------------------------------- # to get column data colData(both_tse) ## -------------------------------------------------------------------------- # to get metadata: it's empty here metadata(both_tse) ## -------------------------------------------------------------------------- (rLink <- rowLinks(both_tse)) ## -------------------------------------------------------------------------- class(rLink) showClass("LinkDataFrame") ## -------------------------------------------------------------------------- nrow(rLink) == nrow(both_tse) ## -------------------------------------------------------------------------- (cLink <- colLinks(both_tse)) nrow(cLink) == ncol(both_tse) ## -------------------------------------------------------------------------- colTree(row_tse) colLinks(row_tse) ## -------------------------------------------------------------------------- sub_tse <- both_tse[1:2, 1] sub_tse ## -------------------------------------------------------------------------- # The first four columns are from rowLinks data and the others from rowData cbind(rowLinks(sub_tse), rowData(sub_tse)) ## -------------------------------------------------------------------------- # The first four columns are from colLinks data and the others from colData cbind(colLinks(sub_tse), colData(sub_tse)) ## -------------------------------------------------------------------------- # use node labels to specify colLevel aggCol <- aggValue(x = both_tse, colLevel = c("GroupA", "GroupB"), FUN = sum) # or use node numbers to specify colLevel aggCol <- aggValue(x = both_tse, colLevel = c(6, 7), FUN = sum) ## -------------------------------------------------------------------------- assays(aggCol)[[1]] ## -------------------------------------------------------------------------- # before aggregation colData(both_tse) # after aggregation colData(aggCol) ## -------------------------------------------------------------------------- # the link data is updated colLinks(aggCol) ## -------------------------------------------------------------------------- agg_row <- aggValue(x = both_tse, rowLevel = 7:9, FUN = sum) ## -------------------------------------------------------------------------- assays(agg_row)[[1]] ## -------------------------------------------------------------------------- rowLinks(agg_row) ## -------------------------------------------------------------------------- agg_both <- aggValue(x = both_tse, colLevel = c(6, 7), rowLevel = 7:9, FUN = sum) ## -------------------------------------------------------------------------- assays(agg_both)[[1]] ## -------------------------------------------------------------------------- # The toy taxonomic table taxa <- data.frame(Kindom = rep("A", 5), Phylum = c("B1", rep("B2", 4)), Class = c("C1", "C2", "C3", "C3", NA), OTU = c("D1", "D2", "D3", "D4", NA)) # convert to a phylo tree taxa_tree <- toTree(data = taxa, cache = FALSE) ggtree(taxa_tree)+ geom_text2(aes(label = node), color = "darkblue", hjust = -0.5, vjust = 0.7, size = 6) + geom_text2(aes(label = label), color = "darkorange", hjust = -0.1, vjust = -0.7, size = 6) + geom_point2() ## -------------------------------------------------------------------------- # construct a TreeSummarizedExperiment object taxa_tse <- TreeSummarizedExperiment(assays = list(assay_data), rowData = row_data, rowTree = taxa_tree, rowNodeLab = taxa_tree$tip.label) ## -------------------------------------------------------------------------- # specify the level taxa_lab <- c(taxa_tree$tip.label, taxa_tree$node.label) ii <- startsWith(taxa_lab, "Phylum:") (l1 <- taxa_lab[ii]) # aggregate agg_taxa <- aggValue(x = taxa_tse, rowLevel = l1, FUN = sum) ## -------------------------------------------------------------------------- assays(agg_taxa)[[1]] ## -------------------------------------------------------------------------- rowData(agg_taxa) ## -------------------------------------------------------------------------- # specify the level l2 <- c("Class:C3", "Phylum:B1") # aggregate agg_any <- aggValue(x = taxa_tse, rowLevel = l2, FUN = sum) ## -------------------------------------------------------------------------- assays(agg_any)[[1]] ## -------------------------------------------------------------------------- rowData(agg_any) ## -------------------------------------------------------------------------- ggtree(tinyTree, branch.length = "none") + geom_text2(aes(label = label), hjust = -0.3) + geom_text2(aes(label = node), vjust = -0.8, hjust = -0.3, color = 'blue') ## -------------------------------------------------------------------------- printNode(tree = tinyTree, type = "all") ## -------------------------------------------------------------------------- # The number of leaves countLeaf(tree = tinyTree) # The number of nodes (leaf nodes and internal nodes) countNode(tree = tinyTree) ## -------------------------------------------------------------------------- transNode(tree = tinyTree, node = c(12, 1, 4)) ## -------------------------------------------------------------------------- transNode(tree = tinyTree, node = c("t4", "Node_18")) ## -------------------------------------------------------------------------- # only the leaf nodes findOS(tree = tinyTree, node = 17, only.leaf = TRUE) ## -------------------------------------------------------------------------- # all descendant nodes findOS(tree = tinyTree, node = 17, only.leaf = FALSE) ## -------------------------------------------------------------------------- # node = 5, node = "t4" are the same node findSibling(tree = tinyTree, node = 5) findSibling(tree = tinyTree, node = "t4") ## -------------------------------------------------------------------------- shareNode(tree = tinyTree, node = c(5, 6)) ## -------------------------------------------------------------------------- isLeaf(tree = tinyTree, node = 5) isLeaf(tree = tinyTree, node = 17) ## -------------------------------------------------------------------------- distNode(tree = tinyTree, node = c(1, 5)) ## -------------------------------------------------------------------------- NT1 <- pruneTree(tree = tinyTree, rmLeaf = c(4, 5), mergeSingle = TRUE) ggtree(NT1, branch.length = "none") + geom_text2(aes(label = label), color = "darkorange", hjust = -0.1, vjust = -0.7) + geom_point2() NT2 <- pruneTree(tree = tinyTree, rmLeaf = c(4, 5), mergeSingle = FALSE) ggtree(NT2, branch.length = "none") + geom_text2(aes(label = label), color = "darkorange", hjust = -0.1, vjust = -0.7) + geom_point2() ## -------------------------------------------------------------------------- matTree(tree = tinyTree) ## -------------------------------------------------------------------------- # dat: a TreeSummarizedExperiment rmRows <- function(dat) { # calculate the total counts of each row count <- assays(dat)[[1]] tot <- apply(count, 1, sum) # find the row with zero in all columns ind <- which(tot == 0) # remove those rows out <- dat[-ind, ] return(out) } (rte <- rmRows(dat = both_tse)) rowLinks(rte) ## -------------------------------------------------------------------------- updateRowTree <- function(tse, dropLeaf) { ## -------------- new tree: drop leaves ---------- oldTree <- rowTree(tse) newTree <- ape::drop.tip(phy = oldTree, tip = dropLeaf) ## -------------- update the row link ---------- # track the tree track <- trackNode(oldTree) track <- ape::drop.tip(phy = track, tip = dropLeaf) # row links rowL <- rowLinks(tse) rowL <- DataFrame(rowL) # update the row links: # 1. use the alias label to track and updates the nodeNum # 2. the nodeLab should be updated based on the new tree using the new # nodeNum # 3. lastly, update the nodeLab_alias rowL$nodeNum <- transNode(tree = track, node = rowL$nodeLab_alias, message = FALSE) rowL$nodeLab <- transNode(tree = newTree, node = rowL$nodeNum, use.alias = FALSE, message = FALSE) rowL$nodeLab_alias <- transNode(tree = newTree, node = rowL$nodeNum, use.alias = TRUE, message = FALSE) rowL$isLeaf <- isLeaf(tree = newTree, node = rowL$nodeNum) rowNL <- as(rowL, "LinkDataFrame") ## update the row tree and links newDat <- BiocGenerics:::replaceSlots(tse, rowLinks = rowNL, rowTree = list(phylo = newTree)) return(newDat) } ## -------------------------------------------------------------------------- # find the mismatch between the rows of the 'assays' table and the leaves of the # tree row_tree <- rowTree(rte) row_link <- rowLinks(rte) leaf_tree <- printNode(tree = row_tree,type = "leaf")$nodeNum leaf_data <- row_link$nodeNum[row_link$isLeaf] leaf_rm <- setdiff(leaf_tree, leaf_data) ntse <- updateRowTree(tse = rte, dropLeaf = leaf_rm) ## -------------------------------------------------------------------------- ntse rowLinks(ntse) ## -------------------------------------------------------------------------- sessionInfo()