% % NOTE -- ONLY EDIT RBGL.Rnw!!! % RBGL.tex file will get overwritten. % %\VignetteIndexEntry{RBGL Overview} %\VignetteDepends{graph} %\VignetteKeywords{Graphs} %\VignettePackage{RBGL} \documentclass[12pt]{article} \usepackage{amsmath} \usepackage[authoryear,round]{natbib} \usepackage{hyperref} \textwidth=6.2in \textheight=8.5in %\parskip=.3cm \oddsidemargin=.1in \evensidemargin=.1in \headheight=-.3in \newcommand{\scscst}{\scriptscriptstyle} \newcommand{\scst}{\scriptstyle} \newcommand\Rpackage[1]{{\textsf{#1}\index{#1 (package)}}} \newcommand\RpackageNoindex[1]{{\textsf{#1}}} \newcommand\Rclass[1]{{\textit{#1}\index{#1 (class)}}} \newcommand\Rfunction[1]{{{\small\texttt{#1}}\index{#1 (function)}}} \newcommand\Rmethod[1]{{\small\texttt{#1}}} \newcommand\Rcommand[1]{{{\small\texttt{#1}}\index{#1 (function)}}} \newcommand\Rfunarg[1]{{\small\texttt{#1}}} \newcommand\Robject[1]{{\small\texttt{#1}}} %\setkeys{Gin}{width=0.55\textwidth} \bibliographystyle{plainnat} \begin{document} <>= library(RBGL) library(Rgraphviz) library(XML) @ \title{{\it RBGL}: R interface to boost graph library} \author{L. Long, VJ Carey, and R. Gentleman} \maketitle \begin{quotation} {\it Summary}. The \Rpackage{RBGL} package is primarily an interface from R to the Boost Graph Library (BGL). It includes some graph algorithms built on top of those from BGL and some algorithms independent of BGL. \end{quotation} \tableofcontents \section{Basic notations/Preliminaries} \subsection{Basics Notations} We use the following notation: {\em G}: a graph, represented as G = (V, E); {\em V} = {v1, v2, ..., vn}: a set of vertices (or nodes); {\em E} = {e1, e2, ..., em}: a set of edges with ei = [vj, vk], with vj, vk are in V; {\em W} = {w1, w2, ..., wm}: a set of weights of the edges, i.e., wi is the weight on edge ei. A {\em walk} is a sequence of vertices {v1, v2, ..., vk} such that for all i, [vi, vi+1] in {\em E}. A {\em path} is a walk without repeated vertices. A {\em cycle} is a path that begins and ends at the same vertice. A {\em directed} graph is a graph with direction assigned to its edges, therefore, [vj, vk] != [vk, vj]. A {\em directed acyclic graph (DAG)} is a directed graph with no directed cycle. An {\em in-degree} of vertex v is the total number of edges [u, v] in E; an {\em out-degree} of v is the total number of edges [v, u] in {\em E}. A network {\em N} is a directed graph {\em G} with (a) a source {\em s} whose in-degree is 0, (b) a sink {\em t} whose out-degree is 0, and (c) a {\em capacity} for each edge in a network. A {\em flow} in {\em N} assigns a value on each edge that doesn't exceed its capacity, all the internal vertices have the same incoming flow as the outgoing flow, {\em s} has outgoing flow only, {\em t} has incoming flow only. \subsection{Examples in use} We are going to use the following graphs repeatedly in the examples. <>= con <- file(system.file("XML/bfsex.gxl", package="RBGL")) bf <- fromGXL(con) close(con) @ <>= plot(bf, main="a) Breath-First Search Example") @ <>= con <- file(system.file("XML/dfsex.gxl", package="RBGL")) df <- fromGXL(con) close(con) @ <>= plot(df, main="b) Depth-First Search Example") @ <>= con <- file(system.file("XML/dijkex.gxl", package="RBGL")) dijk <- fromGXL(con) close(con) @ <>= plot(dijk, main="c) Dijkstra's Example") @ <>= con <- file(system.file("XML/conn.gxl", package="RBGL")) coex <- fromGXL(con) close(con) @ <>= plot(coex, main="d) Coex Example") @ <>= con <- file(system.file("XML/conn2.gxl", package="RBGL")) coex2 <- fromGXL(con) close(con) @ <>= plot(coex2, main="e) Coex2 Example") @ <>= con <- file(system.file("XML/conn2iso.gxl", package="RBGL")) coex2i <- fromGXL(con) close(con) @ <>= plot(coex2i, main="f) Coex2 Isomorphism Example") @ <>= con <- file(system.file("XML/kmstEx.gxl", package="RBGL")) km <- fromGXL(con) close(con) @ <>= plot(km, main="g) Kruskal MST Example") @ <>= con <- file(system.file("XML/biconn.gxl", package="RBGL")) bicoex <- fromGXL(con) close(con) @ <>= plot(bicoex, main="h) Biconnected Component Example") @ <>= con <- file(system.file("XML/ospf.gxl", package="RBGL")) ospf <- fromGXL(con) close(con) @ <>= plot(ospf, main="i) Ospf Example") @ <>= con <- file(system.file("dot/joh.gxl", package="RBGL")) joh <- fromGXL(con) close(con) @ <>= plot(joh, main="j) joh Example") @ <>= con <- file(system.file("XML/hcs.gxl", package="RBGL")) hcs <- fromGXL(con) close(con) @ <>= plot(hcs, main="k) HCS Example") @ <>= con <- file(system.file("XML/snacliqueex.gxl", package="RBGL")) kclex <- fromGXL(con) close(con) @ <>= plot(kclex, main="l) kCliques Example") @ <>= con <- file(system.file("XML/snacoreex.gxl", package="RBGL")) kcoex <- fromGXL(con) close(con) @ <>= plot(kcoex, main="m) kCores Example") @ \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-figbf} & \includegraphics[width=0.49\textwidth]{RBGL-figdf} \\ \includegraphics[width=0.49\textwidth]{RBGL-figdijk} & \includegraphics[width=0.49\textwidth]{RBGL-figcoex} \\ \end{tabular} \end{center} \caption{\label{fig:graphex}% The example graphs (I). } \end{figure} \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-figcoex2} & \includegraphics[width=0.49\textwidth]{RBGL-figcoex2i} \\ \includegraphics[width=0.49\textwidth]{RBGL-figkmst} & \includegraphics[width=0.49\textwidth]{RBGL-figbico} \\ \end{tabular} \end{center} \caption{\label{fig:graphex}% The example graphs (II). } \end{figure} \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-fighcs} & \includegraphics[width=0.49\textwidth]{RBGL-figkclex} \\ \includegraphics[width=0.49\textwidth]{RBGL-figkcoex} \\ \end{tabular} \end{center} \caption{\label{fig:graphex}% The example graphs (III). } \end{figure} \section{Working with the Bioconductor {\tt graph} class} An example object representing file dependencies is included, as shown in Figure \ref{fdpic}. <>= data(FileDep) FileDep @ \begin{figure} <>= z <- plot(FileDep) @ %\includegraphics{filedep} \caption{File dependency digraph example from Boost library.} \label{fdpic} \end{figure} \section{Algorithms from BGL} \subsection{Depth First Search} The \Rfunction{dfs} function returns two vectors of node names of discovery and finish order in a depth-first-search (DFS), starting at the given vertex. <>= print(dfs.res <- dfs(df, "y")) @ In this example, DFS starts with {\em y}, reaches {\em x} and {\em v}; DFS restarts from {\em w}, reaches {\em z}; DFS restarts from {\em u}; at this point, all the vertices in the graph are visited once and only once. You could see the search order in the figure. <>= plot(df, main="a) DFS Example") @ <>= dfsNattrs <- makeNodeAttrs(df) dfsNattrs$label[dfs.res$discovered] <- 1:numNodes(df) plot(df, nodeAttrs=dfsNattrs, main="b) DFS Example with search order") @ \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-figdfs1}& \includegraphics[width=0.49\textwidth]{RBGL-figdfs2} \\ \end{tabular} \end{center} \caption{\label{fig:dfsex}% a) The graph for depth-first-search example. b) The graph for depth-first-search example, showing search orders.} \end{figure} \subsection{Breadth First Search} The \Rfunction{bfs} function returns a vector of node names of discovery order in a breadth-first search (BFS), starting at the given vertex. <>= print(bfs.res <- bfs(bf,"s")) @ In this example, BFS starts from vertex {\em s}, reaches {\em w}; from {\em w} BFS reaches {\em r}, {\em t} and {\em x}; from {\em r} BFS reaches {\em v}; from {\em t} BFS reaches {\em u}; from {\em x} BFS reaches {\em y}; at this time, BFS visits all the vertices in the graph once and only once. <>= plot(bf, main="a) BFS Example") @ <>= bfsNattrs <- makeNodeAttrs(bf) bfsNattrs$label[bfs.res] <- 1:numNodes(bf) plot(bf, nodeAttrs=bfsNattrs, main="b) BFS Example with search order") @ \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-figbfs1}& \includegraphics[width=0.49\textwidth]{RBGL-figbfs2} \\ \end{tabular} \end{center} \caption{\label{fig:bfsex}% a) The graph for breadth-first-search example. b) The graph for breadth-first-search example, showing search orders.} \end{figure} \subsection{Shortest paths} Edge weights play a major role in shortest-path problems. The weight of an edge in a graph could represent the relationship between the two vertices, such as distance, probability, etc. TO-BE-FINALIZED: Our knowledge of such a relationship between two vertices is: (1) we know there is an edge and there is a measured value on it; (2) we know there is an edge but there is NO measured value on it; (3) we know there is NO edge; (4) we DO NOT know if there is an edge. Corresponding edge weights are: case 1: measured value; case 2: \Robject{NA}; case 3: \Robject{Inf}; case 4: TO-BE-DETERMINED When there is a loop of negative weight between two vertices, the distance between them is \Robject{-Inf}. %%FIXME: yes, but I think not available is different than you are %%using it here. Vince seemed to have in mind the idea that there %%would be some subject matter knowledge that a node exists, but it %%has not yet been measured. The shortest path problem is to find a path between two vertices where the sum of all the edge weights on this path is minimum. There are two sets of algorithms available: (1) find shortest paths between a single vertex, say, {\em source s}, and all other vertices, i.e., {\em V-s}, available algorithms are: {\em Dijkstra's}, {\em Bellman Ford's} and {\em DAG}, and (2) find shortest paths between all pairs of vertices, available algorithms are: {\em Johnson's} and {\em Floyd Warshall's}. %%FIXME: I think it would be helpful to first list the possibilities, %%maybe with a brief description of what they do, say in a table, then %%have more complete examples in the sections. Otherwise, it is hard %%for a reader to see what we have. {\em Dijkstra's algorithm } is for the single-source shortest-paths problem on graphs (directed or undirected) with non-negative weights on edges. If all the edges have the same weight, use depth-first-search instead. <>= nodes(dijk) edgeWeights(dijk) dijkstra.sp(dijk) @ The function \Rfunction{dijkstra.sp} finds the shortest paths from A, which is the first node in the node list - default source, to all other vertices in the graph: {\em B, C, D, E}, shown in the {\em distances} part. The {\em penult} shows TO-BE-FILLED-IN. For instance, edge {\em A->C} exists and carries a weight of 1, so the shortest path from {\em A} to {\em C} is 1; the shortest path from {\em A} to {\em B} goes through {\em A->C->D->E->B} and has weight of 6 (1+3+1+1). %%the use of penult is unfortunate. This is very hard for non-native %%speakers and should have been penultimate, if anything. But mostly %%it needs a good description of what it is and what it can be used %%for --- Vince? <>= nodes(ospf)[6] dijkstra.sp(ospf,nodes(ospf)[6]) sp.between(ospf, "RT6", "RT1") @ The first part of this example finds the shortest paths from {\em start RT6} to all the other vertices in the graph, and the second part finds the shortest path between two vertices: {\em RT6} and {\em RT1}. \begin{figure} @ <>= z <- plot(ospf) @ \caption{Network example from BGL.} \end{figure} {\em Bellman-Ford's algorithm} is for the single-source shortest-paths problem on graphs (directed or undirected) with both positive and negative edge weights. The default source is the first entry in the list of nodes in the graph. <>= dd <- coex2 nodes(dd) bellman.ford.sp(dd) bellman.ford.sp(dd,nodes(dd)[2]) @ The first \Rfunction{bellman.ford.sp} returns the shortest paths from {\em start A}, which is the first vertex on the node list, to all other vertices. The second call shows the shortest paths from {\em start B} to all other vertices, since there is no path from {\em B} to {\em A}, the {\em distance} between them is \Robject{Inf}. The {\em DAG algorithm} is for the single-source shortest-paths problem on a weighted, directed acyclic graph (DAG), which is more efficient for DAG than both Dijkstra's and Bellman-Ford's algorithms. When all the edges have the same weight, use depth-first-search instead. <>= dd <- coex2 dag.sp(dd) dag.sp(dd,nodes(dd)[2]) @ It's easy to see that {\em conn2.gxl} doesn't contain any cycle, so we could use function \Rfunction{dag.sp} on it. The first example finds the shortest paths from the {\em start A} to all other vertices. The second example finds the shortest paths from {\em start B} to all other vertices, since no path goes from {\em B} to {\em A}, the distance is {\em Inf}. {\em Johnson's algorithm} finds the shortest path between every pair of vertices in a sparse graph. Its time complexity is {\it O(V E log V)}. <>= zz <- joh edgeWeights(zz) johnson.all.pairs.sp(zz) @ \begin{figure} <>= z <- plot(zz) @ \caption{Example Johnson-all-pairs-shortest-paths example} \end{figure} This example uses a graph with negative edge weights. The shortest paths between all pairs of vertices are presented in the matrix, entry [i, j] gives the distance from vertex {\em i} to vertex {\em j}. For example, the shortest path from vertex {\em c} to vertex {\em d} is of length 5; the shortest path from vertex {\em a} to vertex {\em e} is of length -4, since edge {\em a->e} is available and of distance -4; the shortest distance from {\em a} to {\em c} is -3. {\em Floyd-Warshall's algorithm} finds the shortest path between every pair of vertices in a dense graph. <>= floyd.warshall.all.pairs.sp(coex) @ All edge distances are assumed to be 1, if not given. Since the graph is undirected, the distance matrix is symmetric, for example, distance from {\em C} to {\em G} is the same as that from {\em G} to {\em C}. \subsection{Minimum spanning tree} Minimum-spanning-tree (MST) problem is to find a subset of edges that connect all the vertices, contains no cycles and have the minimum weight sum. There are two algorithms available: {\em Kruskal's algorithm} and {\em Prim's algorithm}. Both are for undirected graphs with weighted edges, and both return a list of edges, weights and nodes determining MST. The \Rfunction{mstree.kruskal} function finds the MST by Kruskal's algorithm. <>= mstree.kruskal(km) @ This graph is treated as undirected graph with corresponding weights. MST consists of 4 edges, {\em A->C, D->E, E->A, B->D}, each is of weight 1. The \Rfunction{mstree.prim} function finds the MST by Prim's algorithm. <>= mstree.prim(coex2) @ The graph is treated as undirected graph with default weight 1. MST consists of 7 edges, {\em A->B, A->C, A->D, C->E, D->H, E->F, C->G}. \subsection{Connected components } There are several algorithms available for this group of problems. A {\em connected component} of an undirected graph is a subgraph that for any two vertices in this subgraph, {\em u} and {\em v}, there's a path from {\em u} to {\em v}. The \Rfunction{connectedComp} function computes the connected components in an undirected graph. <>= km1 <- km km1 <- graph::addNode(c("F","G","H"), km1) km1 <- addEdge("G", "H", km1, 1) km1 <- addEdge("H", "G", km1, 1) connectedComp(ugraph(km1)) @ <>= plot(km1, main="Modified Kruskal MST example") @ \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-figkmst} & \includegraphics[width=0.49\textwidth]{RBGL-figkm1} \\ \end{tabular} \end{center} \caption{\label{fig:graphex}% Kruskal MST examples. } \end{figure} The original graph has one connected component. After we add three vertices, {\em F, G, H} and an edge {\em G-H}, make the graph {\em undirected}, the modified graph has three connected components now. A {\em strongly connected component} of a directed graph is a connected subgraph that for every pair of vertices in this subgraph, {\em u} and {\em v}, there are both a path from {\em u} to {\em v} and a path from {\em v} to {\em u}. The \Rfunction{strongComp} function computes the strongly connected components in a directed graph. <>= km2 <- km km2 <- graph::addNode(c("F","G","H"), km2) km2 <- addEdge("G", "H", km2, 1) km2 <- addEdge("H", "G", km2, 1) strongComp(km2) @ After adding three vertices, {\em F, G, H} and an edge {\em G-H}, there are three strong components in the graph now. A {\em biconnected} graph is a connected graph that removal of any single vertex doesn't disconnect it. If the removal of a vertex increases the number of components in a graph, this vertex is call an {\em articulation point}. The \Rfunction{biConnComp} function computes the biconnected components in an undirected graph. The \Rfunction{articulationPoints} function finds all the articulation points in an undirected graph. <>= biConnComp(bicoex) articulationPoints(bicoex) @ \begin{figure} <>= z <- plot(bicoex) @ \caption{Biconnected components example from Boost library.} \end{figure} There are 4 biconnected components in the example: one with vertices {\em B, C, D} and edges {\em B-C, C-D, B-D} labeled 0, one with vertices {\em A, B, E, F} and edges {\em A-B, B-E, E-F, A-F} labeled 1, one with vertices {\em G, H, I} and edges {\em G-I, G-H, I-H} labeled 2, and one with vertices {\em A, G} and edges {\em A-G} labeled 3. There are 3 articulation points in the example: {\em A, B, G}. It's easy to see removing any one of them will result in more connected components. When you {\em add} edges to an undirected graph and want to get updated information on the connected components, you could use the following functions: \Rfunction{init.incremental.components} function to initialize the process; after adding edges to the graph, use \Rfunction{incremental.components} function to update the information on the connected components; use \Rfunction{same.component} function to find out if two vertices are in the same connected component. Currently, only one incremental graph is allowed at any given time. To start on a new graph, you need to call \Rfunction{init.incremental.components} first. <>= jcoex <- join(coex, hcs) x <- init.incremental.components(jcoex) incremental.components(jcoex) same.component(jcoex, "A", "F") same.component(jcoex, "A", "A1") jcoex <- addEdge("A", "A1", jcoex) x <- init.incremental.components(jcoex) incremental.components(jcoex) same.component(jcoex, "A", "A1") @ \begin{figure} <>= z <- plot(jcoex) @ \caption{Example on incremental components: a graph connecting coex and hcs.} \end{figure} In the first part of this example, we join two separate graphs together, the resulting graph contains two connected components. Vertices {\em A} and {\em F} are in the same connected component, while vertices {\em A} and {\em A1} are not in the same connected component. In the second part of the example, we add an edge connecting {\em A} and {\em X}, which effectively connects the two subgraphs, we have only one connected component left, which consists of all the vertices from the two original graphs, {\em A} and {\em A1} are in the same connected component now. \subsection{Maximum Flow} The functions, \Rfunction{edmonds.karp.max.flow} and \Rfunction{push.relabel.max.flow} are available to find the maximum flow between source and sink. <>= edgeWeights(dijk) edmonds.karp.max.flow(dijk, "B", "D") push.relabel.max.flow(dijk, "C", "B") @ Call to \Rfunction{edmonds.karp.max.flow} finds the maximum flow of 2 from {\em B} to {\em D}: one part of flow 1 is {\em B -> D} directly, another part of flow 1 is {\em B -> E -> A -> C -> D}. Call to \Rfunction{push.relabel.max.flow} find the maximum flow of 8 from {\em C} to {\em B}: one part of flow 7 is {\em C -> B} directly, another part of flow 1 is {\em C -> D -> E -> B}. You can see the flow on each edge in the output, and each is no more than the capacity of the edge. \subsection{Sparse Matrix Ordering} There are three functions available in this category: \Rfunction{cuthill.mckee.ordering}, \Rfunction{minDegreeOrdering} and \Rfunction{sloan.ordering}. {\em Cuthill-McKee's algorithm} tries to reduce the bandwidth of a graph by renumbering its vertices. The outputs are the vertices in the new ordering and reverse ordering. {\em Minimum degree Ordering} is one approach that tries to reduce fill-ins in matrix reordering, which turns a system of equations {\em A x = b} to {\em (P A PT)(P x) = P b}. {\em Sloan Ordering} tries to reduce the profile and wavefront of a graph by renumbering its vertices. <>= dijk1 <- ugraph(dijk) cuthill.mckee.ordering(dijk1) minDegreeOrdering(dijk1) sloan.ordering(dijk1) @ TODO: EXPLAIN THESE OUTPUT. \subsection{Edge connectivity and minimum disconnecting set} %%FIXME: I am a bit confused about the relationship between the stuff %%here, and the cutsets and betweenness centrality stuff? Is there %%anything that should be commented on? For a single connected undirected graph, function {\em edgeConnectivity} calculates the minimum number of edges that have to be removed to create two disconnected components. No edge weight is taken into account and the output is the edges that need to be removed. This is very similar to the {\em minCut} algorithm, which takes the edge weights into account when removing edges and outputs the vertices on the two disconnected components. <>= edgeConnectivity(coex) @ Mimimum of two edges must be removed to create two disconnected components: edges {\em D-E} and {\em D-H}. \subsection{Topological sort} The \Rfunction{tsort} function will return the names of vertices from a DAG in topological sort order. <>= tsort(FileDep) @ Note that if the input graph is not a DAG, BGL {\tt topological\_sort} will check this and throw 'not a dag'. This is crudely captured in the interface (a message is written to the console and zeroes are returned). <>= FD2 <- FileDep # now introduce a cycle FD2 <- addEdge(from="bar_o", to="dax_h", FD2) tsort(FD2) @ \subsection{Layout} If you want to simply draw a graph, you should consider using package {\em Rgraphviz}. Package {\em Rgraphviz} provides a lot more functionalities in graph layout, you can use it to do the actual layouts: a. "neato" uses Kamada-Kawai algorithm to make spring model layout, b. "fdp" uses Fruchterman-Reingold algorithm to make spring model layout, c. "circo" does circular layout. See Figure~\ref{fig:graphlayout} for an example. <>= # library(Rgraphviz) plot(coex, "neato") @ <>= z <- plot(coex, "neato") @ The following functions are interfaces to those in BGL, they only calculate the (x, y)-coordinates of the vertices in the graph. The actual layout is achieved with additional function, we provide an example on how to do so. Following functions are available: The \Rfunction{randomGraphLayout} function puts the vertices randomly on a plane; The \Rfunction{circleLayout} function puts the vertices as vertices of a regular polygon; The \Rfunction{kamadaKawaiSpringLayout} function is for connected, undirected graphs, it treats the edges as springs and tries to minimize the energy of the whole system; The \Rfunction{fruchtermanReingoldForceDirectedLayout} function is for unweighted, undirected, possibly disconnected graphs, it treats the edges as forces that pull vertices together, no-edges as forces that push vertices apart, vertices move to a position as environment changes; initial positions of the vertices are set randomly by calling \Rfunction{randomGraphLayout}. Notice that the choice of "width" and "height" values has dramatic impact on the performance. <>= randomGraphLayout(coex) circleLayout(coex) kamadaKawaiSpringLayout(coex) fruchtermanReingoldForceDirectedLayout(coex, 10, 10) @ Outputs are the (x, y)-coordinates of the vertices in the layout. If you want to draw the graphs based on the calculated coordinates, you need an additional function. The following is an example. <>= crudeGraphPlot <- function(g, alg=circleLayout, ...) { # # the alg parameter is a function that computes the # layout of g, returning it as a list of length 1 # with two rows: top row is x coordinates, bottom is # y coordinates, and node names are used as colnames # the ... are passed to segments() # layout <- alg(g) plot( layout[1,], layout[2,], pch=nodes(g), axes=FALSE, xlab="", ylab="", main=substitute(g), cex=1.4 ) ee <- edges(g) src <- names(ee) ds <- function(nn1, nn2, lob) segments(lob[1,nn1], lob[2,nn1], lob[1,nn2], lob[2,nn2], ...) for (s in src) sapply(ee[[s]], function(x) ds(s, x, layout)) invisible(NULL) } crudeGraphPlot(coex) crudeGraphPlot(coex, alg=kamadaKawaiSpringLayout, col="green") @ <>= crudeGraphPlot(coex) @ <>= crudeGraphPlot(coex, alg=kamadaKawaiSpringLayout, col="green") @ \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-figneato} & \includegraphics[width=0.49\textwidth]{RBGL-figlayout1} \\ \includegraphics[width=0.49\textwidth]{RBGL-figlayout2} \\ \end{tabular} \end{center} \caption{\label{fig:graphlayout}% a) plot(coex, neato). b) crudeGraphPlot(coex). c) crudeGraphPlot(coex, alg=kamadaKawaiSpringLayout, col="green"). } \end{figure} Call crudeGraphPlot(coex) gives the default circular layout on your graphics device. Call crudeGraphPlot(coex, alg=kamada.kawai.spring.layout, col="green") shows you the spring layout with green edges, see Figure~\ref{fig:graphlayout}. \subsection{Isomorphism} The \Rfunction{isomorphism} function determines if two graphs are isomorphism, i.e., determines if there is a one-to-one mapping {\em f} of vertices from one graph {\em g1} to the other {\em g2} such that edge {\em u -> v} is in {\em E(g1)} iff edge {\em f(u) -> f(v)} exists and is in {\em E(g2)}. <>= isomorphism(dijk, coex2) isomorphism(coex2i, coex2) @ \begin{figure} <>= z <- plot(coex2i) @ \caption{Example conn2i} \end{figure} The function handles both directed and undirected graphs. There are more vertices in graph {\em conn2} than {\em dijkstra}, so it's impossible to find a one-to-one mapping between them. One the other hand, graph {\em conn2i} is basically the same graph as {\em conn2} except the vertices have different names, so they are isomorphism. \subsection{Vertex Coloring} The \Rfunction{sequential.vertex.coloring} function assigns colors, as numbers 0, 1, 2, ..., to vertices in a graph so that two vertices connected by an edge are of different colors. It does not guarantee that minimum number of colors is used, and the result depends on the input ordering of the vertices in the graph. <>= sequential.vertex.coloring(coex) @ We need 4 colors for the vertices of this graph, one color scheme is to give color 0 to vertices {\em A, E}, color 1 to vertices {\em B, H}, color 2 to vertices {\em C, F} and color 3 to vertices {\em D, G}. \subsection{Transitive Closure} Directed graphs can be used to represent relations, $R$, on a finite set of objects, V. For example, the objects could be integers and the relation could be less than. An edge exists in the graph, between two nodes, $(u,v)$ if the $(u,v) \in R$. A \textit{transitive digraph} is a digraph whose corresponding relation is transitive. That is, if there is an edge $(u, v)$ and an edge $(v, w)$, then there must be an edge $(u, w)$. Finally, the transitive closure, $R^*$ of an arbitrary relation $R$ is the smallest transitive relation that contains $R$. Finally, if $D$ is the directed graph that represents the relation $R$, then the directed graph $D^*$ that represents the relation $R^*$ is called the transitive closure of $D$. The function \Rfunction{transitive.closure} returns the transitive closure of a directed graph. In the code below we compute the transitive closure for the example graph \Robject{dijk} and then plot both the original graph, and the transitive closure in Figure~\ref{fig:graphTC}. %%FIXME: surely this should return a graph of the same class as the %%input, not some list? Not sure this is the most elegant, but it %%seems to work - someone should check that the output is actually the %%transitive closure - I do not much like the self-loops, but maybe %%they are part of it. TO-BE-FINALIZED: The transitive closure {\em tc} of a graph {\em g} is a graph that contains the same set of vertices as {\em g}, and there is an edge from {\em u} to {\em v} in {\em tc} iff there is a path from {\em u} to {\em v} in {\em g}. <>= dijk.tc = transitive.closure(dijk) @ <>= plot(dijk.tc, main="b) Transitive closure") @ \begin{figure}[tp] \begin{center} \begin{tabular}{cc} \includegraphics[width=0.49\textwidth]{RBGL-figdijk} & \includegraphics[width=0.49\textwidth]{RBGL-figdijkTC} \\ \end{tabular} \end{center} \caption{\label{fig:graphTC}% a) The graph for Dijkstra's example. b) The transitive closure of the graph in panel a)} \end{figure} In this graph, you can reach each and every vertex from any vertex. \subsection{Wavefront, Profiles} TODO: EXPLAIN THESE TERMS The following functions are available: \Rfunction{ith.wavefront}, \Rfunction{maxWavefront}, \Rfunction{aver.wavefront} and \Rfunction{rms.wavefront}. <>= ss <- 1 ith.wavefront(dijk, ss) maxWavefront(dijk) aver.wavefront(dijk) rms.wavefront(dijk) @ TODO: EXPLAIN THESE RESULTS \subsection{Betweenness Centrality and Clustering} {\em Betweenness centrality} of a vertex (or an edge) measures its importance in a graph, i.e., among all the shortest paths between every pair of vertices in the graph, how many of them have to go through this vertex (or edge). {\em Relative} betweenness centrality is calculated by scaling the {\em absolute} betweenness centrality by factor {\em 2/((n-1)(n-2))}, where {\em n} is the number of vertices in the graph. The \Rfunction{brandes.betweenness.centrality} function implements Brandes' algorithm in calculating betweenness centrality. The \Rfunction{betweenness.centrality.clustering} function implements clustering in a graph based on edge betweenness centrality. TODO: EXPLAIN MORE <>= brandes.betweenness.centrality(coex) betweenness.centrality.clustering(coex, 0.1, TRUE) @ TODO: EXPLAIN THE RESULTS \section{Algorithms built on RBGL} \subsection{Min-Cut} Given an undirected graph G=(V, E) of a single connected component, a {\em cut} is a partition of the set of vertices into two non-empty subsets S and V-S, a {\em cost} is the weight sum of edges that are incident on one vertex in S and one vertex in V-S. The min-cut problem is to find a cut (S, V-S) of minimum cost. For simplicity, subset {\em S} is the smaller of the two. <>= minCut(coex) @ Currently all edge weights are assumed to be 1, minimum cut is of weight 2, it will partition the graph into two subsets: subset {\em A, B, C, D} and subset {\em E, H, F, G}. \subsection{highlyConnSG} A graph {\em G} with {\em n} vertices is highly connected if its connectivity {\em k(G) > n/2}. Function {\em highlyConnSG} partitions a graph into a set of highly connected subgraphs, by using minimum-cut algorithm repeatedly. To improve performance, it takes special care of singletons, low degree vertices and merges clusters. <>= highlyConnSG(coex) highlyConnSG(hcs) @ In graph {\em conn}, two highly-connected-subgraphs are found: subgraph with vertices {\em A, B, C, D} and subgraph with vertices {\em E, H, F, G}. In graph {\em hcs}, 3 highly-connected-subgraphs are found: subgraph with vertices {\em A1, A2, A3, A4, A5}, subgraph with vertices {\em B1, B2, B3, B4} and subgraph with vertices {\em X, Y, Z}. \section{Algorithms independent from RBGL} \subsection{maxClique} A {\em clique} is a complete subgraph, i.e., there is an edge between every pair of vertices. {\em Maximum Clique} problem is to find the largest clique in a graph. This problem is NP-complete, which means it cannot be solved by any known polynomial algorithm. Function {\em maxClique} implements the algorithm from {\em Finding all cliques of an undirected graph}, by C. Bron and J. Kerbosch (CACM, Sept 1973, Vol 16, No. 9.), which finds all the cliques in a graph. <>= maxClique(coex) maxClique(hcs) @ In graph {\em conn}, 3 cliques are found: clique with vertices {\em D, B, C, A}, clique with vertices {\em D, E, H} and clique with vertices {\em F, E, H, H}. In graph {\em hcs}, 10 cliques are found. For instance, vertices {\em A2, A4, A3} form a clique, vertices {\em B1, Y} form a clique. \subsection{is.triangulated} A graph is {\em triangulated} if all cycles of length 4 or more have a chord. The \Rfunction{is.triangulated} function returns TRUE or FALSE, accordingly. We implemented the following algorithm from {\em Combinatorial Optimization: algorithms and complexity} (p. 403) by C. H. Papadimitriou, K. Steiglitz: G is chordal iff either G is an empty graph, or there is a {\em v} in {\em V} such that (i) the neighborhood of {\em v}, i.e., {\em v} and its adjacent vertices, forms a clique, and (ii) recursively, {\em G-v} is chordal. <>= is.triangulated(coex) is.triangulated(hcs) @ \subsection{separates} Function {\em separates} determines if a subset of vertices separates two other subsets of vertices, and returns TRUE or FALSE, accordingly. <>= separates("B", "A", "E", km) separates("B", "A", "C", km) @ \subsection{kCores} A {\em k-core} in a graph is a subgraph where each vertex is adjacent to at least {\tt k} other vertices in the same subgraph. Function {\em kCores} finds all the k-cores in a graph. It returns the core numbers for all the nodes in the graph. When the given graph is directed, you can choose whether in-degree, out-degree or both should be considered. The k-core of a graph is not a necessarily connected subgraph. If i > j, the i-core of a graph contains the j-core of the same graph. The implementation is based on the algorithm by V. Batagelj and M. Zaversnik, 2002. <>= kCores(kcoex) kcoex2 <- coex2 kCores(kcoex2) kCores(kcoex2, "in") kCores(kcoex2, "out") g1 <- addEdge("C", "B", kcoex2) kCores(g1, "in") g2 <- addEdge("C", "A", kcoex2) kCores(g2, "out") @ \begin{figure} <>= z <- plot(kcoex) @ \caption{K-cores Example.} \label{fdkcore} \end{figure} The example on directed graph, "conn2", turns out to be a waterfall-like graph. If we order the nodes as: A, B, C, D, E, F, H, G, all the edges go in the same direction, i.e., i -> j, i < j. Let's consider in-degree-only case: A has no in-edge so it is 0-core; after you eliminate A, no in-edge to B, so B is 0-core; continue this, we could see that there's no subset of nodes that each and every single node has 1 in-degree. Therefore, they are all of 0-core. For out-degree-only case: G has no out-edge, so it's 0-core; after eliminating G, F has no out-edge, so F is 0-core; continue this process, we could see that there's no subset of nodes that each and every single node has 1 out-edge. Therefore, they are all of 0-core. If we add edge(s) to break the waterfall-like property, {\em C->B}, {\em C->A}, separately, we could see the changes in the core numbers that are consistant with the analysis above. \subsection{kCliques} In social network analysis, a k-cliques is a maximum subgraph that the shortest distance between any two nodes is no more than k. Function {\em kCliques} finds all the k-cliques in an undirected graph (k = 1, ..., N, where N is the length of the longest shortest-path). It returns all the k-cliques. Let D be a matrix, D[i][j] is the shortest path from node i to node j. Algorithm is outlined as following. o. use Johnson's algorithm to fill D; let N = max(D[i][j]) for all i, j; o. each edge is a 1-clique by itself; o. for k = 2, ..., N, try to expand each (k-1)-clique to k-clique: o. consider a (k-1)-clique the current k-clique KC; o. repeat the following: if for all nodes j in KC, D[v][j] <= k, add node v to KC; o. eliminate duplicates; o. the whole graph is N-clique. <>= kCliques(kclex) @ \begin{figure} <>= z <- plot(kclex) @ \caption{K-cliques Example.} \label{fdkcliques} \end{figure} \end{document}