--- title: "Tree Visualization" author: "Guangchuang Yu and Tommy Tsan-Yuk Lam\\ School of Public Health, The University of Hong Kong" date: "`r Sys.Date()`" bibliography: ggtree.bib biblio-style: apalike output: prettydoc::html_pretty: toc: true theme: cayman highlight: github pdf_document: toc: true vignette: > %\VignetteIndexEntry{02 Tree Visualization} %\VignetteEngine{knitr::rmarkdown} %\usepackage[utf8]{inputenc} --- ```{r style, echo=FALSE, results="asis", message=FALSE} knitr::opts_chunk$set(tidy = FALSE, message = FALSE) ``` ```{r echo=FALSE, results="hide", message=FALSE} library("ape") library("grid") library("ggplot2") library("cowplot") library("treeio") library("ggtree") CRANpkg <- function (pkg) { cran <- "https://CRAN.R-project.org/package" fmt <- "[%s](%s=%s)" sprintf(fmt, pkg, cran, pkg) } Biocpkg <- function (pkg) { sprintf("[%s](http://bioconductor.org/packages/%s)", pkg, pkg) } ``` To view a phylogenetic tree, we first need to parse the tree file into *R*. The `r Biocpkg('ggtree')` [@yu_ggtree:_2017] package supports many file formats via the `r Biocpkg('treeio')` package, including output files of commonly used software packages in evolutionary biology. For more details, plase refer to the [treeio vignette](https://bioconductor.org/packages/devel/bioc/vignettes/treeio/inst/doc/Importer.html). ```{r} library("treeio") library("ggtree") nwk <- system.file("extdata", "sample.nwk", package="treeio") tree <- read.tree(nwk) ``` # Viewing a phylogenetic tree with *ggtree* The `r Biocpkg('ggtree')` package extends `r CRANpkg('ggplot2')` [@wickham_ggplot2_2009] package to support viewing phylogenetic tree. It implements `geom_tree` layer for displaying phylogenetic tree, as shown below: ```{r fig.width=3, fig.height=3, fig.align="center"} ggplot(tree, aes(x, y)) + geom_tree() + theme_tree() ``` The function, `ggtree`, was implemented as a short cut to visualize a tree, and it works exactly the same as shown above. `r Biocpkg('ggtree')` takes all the advantages of `r CRANpkg('ggplot2')`. For example, we can change the color, size and type of the lines as we do with `r CRANpkg('ggplot2')`. ```{r fig.width=3, fig.height=3, fig.align="center"} ggtree(tree, color="firebrick", size=1, linetype="dotted") ``` By default, the tree is viewed in ladderize form, user can set the parameter *ladderize = FALSE* to disable it. ```{r fig.width=3, fig.height=3, fig.align="center"} ggtree(tree, ladderize=FALSE) ``` The *branch.length* is used to scale the edge, user can set the parameter *branch.length = "none"* to only view the tree topology (cladogram) or other numerical variable to scale the tree (*e.g.* _d~N~/d~S~_, see also in [Tree Annotation](treeAnnotation.html) vignette). ```{r fig.width=3, fig.height=3, fig.align="center"} ggtree(tree, branch.length="none") ``` # Layout Currently, `r Biocpkg('ggtree')` supports several layouts, including: + rectangular (by default) + slanted + circular + fan for *phylogram* (by default) and *cladogram* if user explicitly setting *branch.length='none'*. Unrooted (equal angle and daylight methods), time-scaled and 2-dimensional layouts are also supported. ## Phylogram and Cladogram ```{r eval=F} library(ggtree) set.seed(2017-02-16) tr <- rtree(50) ggtree(tr) ggtree(tr, layout="slanted") ggtree(tr, layout="circular") ggtree(tr, layout="fan", open.angle=120) ggtree(tr, layout="equal_angle") ggtree(tr, layout="daylight") ggtree(tr, branch.length='none') ggtree(tr, branch.length='none', layout='circular') ggtree(tr, layout="daylight", branch.length = 'none') ``` ```{r echo=F, fig.width=8, fig.height=8, message=FALSE} library(ggtree) set.seed(2017-02-16) tr <- rtree(50) library(cowplot) theme_layout <- theme(plot.title = element_text(hjust = 0.5)) plot_grid( ggtree(tr) + ggtitle("rectangular (phylogram)")+ theme_layout, ggtree(tr, layout="slanted") + ggtitle("slanted (phylogram)")+theme_layout, ggtree(tr, layout="circular") + ggtitle("circular (phylogram)")+theme_layout, ggtree(tr, layout="fan", open.angle=120) + ggtitle("fan (phylogram)")+theme_layout, ggtree(tr, layout="equal_angle")+ ggtitle("equal angle (unrooted)")+theme_layout, ggtree(tr, layout="daylight")+ ggtitle("daylight (unrooted)")+theme_layout, ggtree(tr, branch.length='none')+ ggtitle("rectangular (cladogram)")+theme_layout, ggtree(tr, branch.length='none', layout='circular')+ ggtitle("circular (cladogram)")+theme_layout, ggtree(tr, layout="daylight", branch.length = 'none')+ ggtitle("daylight (cladogram)")+theme_layout, ncol=3) ``` There are also other possible layouts that can be drawn by modifying scales/coordination, for examples, [reverse label of time scale](https://github.com/GuangchuangYu/ggtree/issues/87), [repropotion circular/fan tree]( https://groups.google.com/d/msgid/bioc-ggtree/6db25ec7-ffb1-4776-bfe4-4a1ba239c8e8%40googlegroups.com), *etc.*. ```{r eval=FALSE} ggtree(tr) + scale_x_reverse() ggtree(tr) + coord_flip() ggtree(tr) + scale_x_reverse() + coord_flip() print(ggtree(tr), newpage=TRUE, vp = grid::viewport(angle=-30, width=.9, height=.9)) ggtree(tr, layout='slanted') + coord_flip() ggtree(tr, layout='slanted', branch.length='none') + coord_flip() + scale_y_reverse() +scale_x_reverse() ggtree(tr, layout='circular') + xlim(-10, NA) ggtree(tr) + scale_x_reverse() + coord_polar(theta='y') ggtree(tr) + scale_x_reverse(limits=c(10, 0)) + coord_polar(theta='y') ``` ```{r fig.keep='none', echo=FALSE, warning=FALSE} tree_angle <- grid::grid.grabExpr(print(ggtree(tr), newpage=TRUE, vp = grid::viewport(angle=-30, width=.9, height=.9))) ``` ```{r fig.width=8, fig.height = 8, echo=FALSE, warning=FALSE} plot_grid( ggtree(tr) + scale_x_reverse(), ggtree(tr) + coord_flip(), ggtree(tr) + scale_x_reverse() + coord_flip(), tree_angle, ggtree(tr, layout='slanted') + coord_flip(), ggtree(tr, layout='slanted', branch.length='none') + coord_flip() + scale_y_reverse() +scale_x_reverse(), ggtree(tr, layout='circular') + xlim(-10, NA), ggtree(tr) + scale_x_reverse() + coord_polar(theta='y'), ggtree(tr) + scale_x_reverse(limits=c(15, 0)) + coord_polar(theta='y'), ncol = 3, labels = LETTERS[1:9]) ``` ## Time-scaled tree A phylogenetic tree can be scaled by time (time-scaled tree) by specifying the parameter, *mrsd* (most recent sampling date). ```{r fig.width=8, fig.height=4, fig.align="center"} tree2d <- read.beast(system.file("extdata", "twoD.tree", package="treeio")) ggtree(tree2d, mrsd = "2014-05-01") + theme_tree2() ``` ## Two dimensional tree `r Biocpkg('ggtree')` implemented two dimensional tree. It accepts parameter *yscale* to scale the y-axis based on the selected tree attribute. The attribute should be numerical variable. If it is *character*/*category* variable, user should provides a name vector of mapping the variable to numeric by passing it to parameter *yscale_mapping*. ```{r fig.width=9, fig.height=4, fig.align="center"} ggtree(tree2d, mrsd = "2014-05-01", yscale="NGS", yscale_mapping=c(N2=2, N3=3, N4=4, N5=5, N6=6, N7=7)) + theme_classic() + theme(axis.line.x=element_line(), axis.line.y=element_line()) + theme(panel.grid.major.x=element_line(color="grey20", linetype="dotted", size=.3), panel.grid.major.y=element_blank()) + scale_y_continuous(labels=paste0("N", 2:7)) ``` In this example, the figure demonstrates the quantity of __*y*__ increase along the trunk. User can highlight the trunk with different line size or color using the functions described in [Tree Manipulation](treeManipulation.html) vignette. # Displaying tree scale (evolution distance) To show tree scale, user can use `geom_treescale()` layer. ```{r fig.width=4, fig.height=4, fig.align="center"} ggtree(tree) + geom_treescale() ``` `geom_treescale()` supports the following parameters: + *x* and *y* for tree scale position + *width* for the length of the tree scale + *fontsize* for the size of the text + *linesize* for the size of the line + *offset* for relative position of the line and the text + *color* for color of the tree scale ```{r eval=F} ggtree(tree) + geom_treescale(x=0, y=12, width=6, color='red') ggtree(tree) + geom_treescale(fontsize=8, linesize=2, offset=-1) ``` ```{r fig.width=8, fig.height=4, fig.align="center", echo=F} plot_grid( ggtree(tree)+geom_treescale(x=0, y=12, width=6, color='red'), ggtree(tree)+geom_treescale(fontsize=8, linesize=2, offset=-1), ncol = 2, labels = LETTERS[1:2]) ``` We can also use `theme_tree2()` to display the tree scale by adding *x axis*. ```{r fig.width=3, fig.height=3, fig.align="center"} ggtree(tree) + theme_tree2() ``` Tree scale is not restricted to evolution distance, `r Biocpkg('ggtree')` can re-scale the tree with other numerical variable. More details can be found in the [Tree Annotation](treeAnnotation.html) vignette. # Displaying nodes/tips Showing all the internal nodes and tips in the tree can be done by adding a layer of points using `geom_nodepoint`, `geom_tippoint` or `geom_point`. ```{r fig.width=3, fig.height=3, fig.align="center"} ggtree(tree)+geom_point(aes(shape=isTip, color=isTip), size=3) ``` ```{r fig.width=3, fig.height=3, fig.align="center"} p <- ggtree(tree) + geom_nodepoint(color="#b5e521", alpha=1/4, size=10) p + geom_tippoint(color="#FDAC4F", shape=8, size=3) ``` ## Displaying labels Users can use `geom_text` or `geom_label` to display the node (if available) and tip labels simultaneously or `geom_tiplab` to only display tip labels: ```{r fig.width=3, fig.height=3, warning=FALSE, fig.align="center"} p + geom_tiplab(size=3, color="purple") ``` `geom_tiplab` not only supports using *text* or *label* geom to display labels, it also supports *image* geom to label tip with image files. A corresponding geom, `geom_nodelab` is also provided for displaying node labels. For details of label nodes with images, please refer to the vignette, [Annotating phylogenetic tree with images](https://guangchuangyu.github.io/software/ggtree/vignettes/ggtree-ggimage.html). For *circular* and *unrooted* layout, `r Biocpkg('ggtree')` supports rotating node labels according to the angles of the branches. ```{r fig.width=6, fig.height=6, warning=FALSE, fig.align="center"} ggtree(tree, layout="circular") + geom_tiplab(aes(angle=angle), color='blue') ``` To make it more readable for human eye, `r Biocpkg('ggtree')` provides a `geom_tiplab2` for `circular` layout (see post [1](https://groups.google.com/forum/?utm_medium=email&utm_source=footer#!topic/bioc-ggtree/o35PV3iHO-0) and [2](https://groups.google.com/forum/#!topic/bioc-ggtree/p42R5l8J-14)). ```{r fig.width=6, fig.height=6, warning=FALSE, fig.align="center"} ggtree(tree, layout="circular") + geom_tiplab2(color='blue') ``` By default, the positions are based on the node positions, we can change them to based on the middle of the branch/edge. ```{r fig.width=4, fig.height=3, warning=FALSE, fig.align="center"} p + geom_tiplab(aes(x=branch), size=3, color="purple", vjust=-0.3) ``` Based on the middle of branch is very useful when annotating transition from parent node to child node. # update tree view with a new tree In previous example, we have a _`p`_ object that stored the tree viewing of 13 tips and internal nodes highlighted with specific colored big dots. If users want to apply this pattern (we can imaging a more complex one) to a new tree, you don't need to build the tree step by step. `ggtree` provides an operator, _`%<%`_, for applying the visualization pattern to a new tree. For example, the pattern in the _`p`_ object will be applied to a new tree with 50 tips as shown below: ```{r fig.width=3, fig.height=3, fig.align="center"} p %<% rtree(50) ``` # theme `theme_tree()` defined a totally blank canvas, while _`theme_tree2()`_ adds phylogenetic distance (via x-axis). These two themes all accept a parameter of _`bgcolor`_ that defined the background color. Users can pass any [theme components](http://ggplot2.tidyverse.org/reference/theme.html) to the `theme_tree()` function to modify them. ```{r eval=F} ggtree(rtree(30), color="red") + theme_tree("steelblue") ggtree(rtree(20), color="white") + theme_tree("black") ``` ```{r fig.width=8, fig.height=3, fig.align="center", echo=F} cowplot::plot_grid( ggtree(rtree(30), color="red") + theme_tree("steelblue"), ggtree(rtree(20), color="purple") + theme_tree("black"), ncol=2) ``` # Visualize a list of trees `ggtree` supports `multiPhylo` object and a list of trees can be viewed simultaneously. ```{r fig.width=12, fig.height=4} trees <- lapply(c(10, 20, 40), rtree) class(trees) <- "multiPhylo" ggtree(trees) + facet_wrap(~.id, scale="free") + geom_tiplab() ``` One hundred bootstrap trees can also be view simultaneously. ```{r fig.width=20, fig.height=20} btrees <- read.tree(system.file("extdata/RAxML", "RAxML_bootstrap.H3", package="treeio")) ggtree(btrees) + facet_wrap(~.id, ncol=10) ``` Another way to view the bootstrap trees is to merge them together to form a density tree. We can add a layer of the best tree on the top of the density tree. ```{r} p <- ggtree(btrees, layout="rectangular", color="lightblue", alpha=.3) best_tree <- read.tree(system.file("extdata/RAxML", "RAxML_bipartitionsBranchLabels.H3", package="treeio")) df <- fortify(best_tree, branch.length = 'none') p+geom_tree(data=df, color='firebrick') ``` # Rescale tree Most of the phylogenetic trees are scaled by evolutionary distance (substitution/site). In `ggtree`, users can re-scale a phylogenetic tree by any numerical variable inferred by evolutionary analysis (e.g. *dN/dS*). ```{r fig.width=10, fig.height=5} library("treeio") beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree") beast_tree <- read.beast(beast_file) beast_tree p1 <- ggtree(beast_tree, mrsd='2013-01-01') + theme_tree2() + ggtitle("Divergence time") p2 <- ggtree(beast_tree, branch.length = 'rate') + theme_tree2() + ggtitle("Substitution rate") library(cowplot) plot_grid(p1, p2, ncol=2) ``` ```{r fig.width=10, fig.height=5} mlcfile <- system.file("extdata/PAML_Codeml", "mlc", package="treeio") mlc_tree <- read.codeml_mlc(mlcfile) p1 <- ggtree(mlc_tree) + theme_tree2() + ggtitle("nucleotide substitutions per codon") p2 <- ggtree(mlc_tree, branch.length='dN_vs_dS') + theme_tree2() + ggtitle("dN/dS tree") plot_grid(p1, p2, ncol=2) ``` In addition to specify `branch.length` in tree visualization, users can change branch length stored in tree object by using `rescale_tree` function. ```{r} beast_tree2 <- rescale_tree(beast_tree, branch.length = 'rate') ggtree(beast_tree2) + theme_tree2() ``` # Zoom on a portion of tree `ggtree` provides _`gzoom`_ function that similar to _`zoom`_ function provided in `ape`. This function plots simultaneously a whole phylogenetic tree and a portion of it. It aims at exploring very large trees. ```{r fig.width=9, fig.height=5, fig.align="center"} library("ape") data(chiroptera) library("ggtree") gzoom(chiroptera, grep("Plecotus", chiroptera$tip.label)) ``` Zoom in selected clade of a tree that was already annotated with `ggtree` is also supported. ```{r fig.width=9, fig.height=5, message=FALSE, warning=FALSE} groupInfo <- split(chiroptera$tip.label, gsub("_\\w+", "", chiroptera$tip.label)) chiroptera <- groupOTU(chiroptera, groupInfo) p <- ggtree(chiroptera, aes(color=group)) + geom_tiplab() + xlim(NA, 23) gzoom(p, grep("Plecotus", chiroptera$tip.label), xmax_adjust=2) ``` # Color tree In `ggtree`, coloring phylogenetic tree is easy, by using `aes(color=VAR)` to map the color of tree based on a specific variable (numeric and category are both supported). ```{r fig.width=5, fig.height=5} ggtree(beast_tree, aes(color=rate)) + scale_color_continuous(low='darkgreen', high='red') + theme(legend.position="right") ``` User can use any feature (if available), including clade posterior and *dN/dS* _etc._, to scale the color of the tree. # References