## ----setup, echo=FALSE, message=FALSE-----------------------------------------
library(ggplot2)
library(knitr)
library(rTRM)
library(org.Mm.eg.db)

## ----fig.wide=TRUE, echo=FALSE, fig.cap="Workflow for the identification of TRMs. Steps 1-3 can be performed with standard Bioconductor approaches. rTRM implements a method to perform step 4."----
knitr::include_graphics("workflow.pdf")

## ----fig.small=TRUE, fig.cap = "Identification of a TRM 1from a test network (left). In the resulting TRM (right) dark blue indicates the target node light blue are query nodes and white nodes are bridge nodes"----
# load the rTRM package
library(rTRM)

# load network example.
load(system.file(package = "rTRM", "extra/example.rda"))

# plot network
plot(g, vertex.size = 20, vertex.label.cex = .8, layout = layout.graphopt)

# define target and query nodes:
target <- "N6"
query <- c("N7", "N12", "N28")

# find TRM:
s <- findTRM(g, target = target, query = query, method = "nsa", max.bridge = 1)

# annotate nodes:
V(s)$color <- "white"
V(s)[ name %in% query]$color <- "steelblue2"
V(s)[ name %in% target]$color <- "steelblue4"

# plot:
plot(s,vertex.size=20,vertex.label.cex=.8)

## -----------------------------------------------------------------------------
pwm <- getMatrices()
head(pwm, 1)

## -----------------------------------------------------------------------------
ann <- getAnnotations()
head(ann)

## -----------------------------------------------------------------------------
map <- getMaps()
head(map)

## -----------------------------------------------------------------------------
o <- getOrthologs(organism = "mouse")
head(o)

## -----------------------------------------------------------------------------
getOrthologFromMatrix("MA0009.1", organism="human")
getOrthologFromMatrix("MA0009.1", organism="mouse")

## -----------------------------------------------------------------------------
# check statistics about the network.
biogrid_mm()
# load mouse PPI network:
data(biogrid_mm)

## ----eval=FALSE---------------------------------------------------------------
#  # obtain dataset.
#  db <- getBiogridData() # retrieves latest release.
#  # db = getBiogridData("3.2.96") # to get a specific release.
#  
#  # check release:
#  db$release
#  db$data
#  
#  # process PPI data for different organisms (currently supported human and mouse):
#  biogrid_hs <- processBiogrid(db, org = "human")
#  biogrid_mm <- processBiogrid(db, org = "mouse")

## ----eval=FALSE---------------------------------------------------------------
#  library(PSICQUIC)
#  psicquic <- PSICQUIC()
#  providers(psicquic)
#  
#  # obtain BioGrid human PPIs (as data.frame):
#  tbl <- interactions(psicquic, species="9606",provider="BioGrid")
#  
#  # the target and source node information needs to be polished (i.e. must be Entrez gene id only)
#  biogrid_hs <- data.frame(source=tbl$A,target=tbl$B)
#  biogrid_hs$source <- sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$source)
#  biogrid_hs$target <- sub(".*locuslink:(.*)\\|BIOGRID:.*","\\1", biogrid_hs$target)
#  
#  # create graph.
#  library(igraph)
#  biogrid_hs <- graph.data.frame(biogrid_hs,directed=FALSE)
#  biogrid_hs <- simplify(biogrid_hs)
#  
#  # annotate with symbols.
#  library(org.Hs.eg.db)
#  V(biogrid_hs)$label <- select(org.Hs.eg.db,keys=V(biogrid_hs)$name,columns=c("SYMBOL"))$SYMBOL

## -----------------------------------------------------------------------------
# read motif enrichment results.
motif_file <- system.file("extra/sox2_motif_list.rda", package = "rTRM")
load(motif_file)
length(motif_list)
head(motif_list)

## -----------------------------------------------------------------------------
# get the corresponding gene.
tfs_list <- getOrthologFromMatrix(motif_list, organism = "mouse")
tfs_list <- unique(unlist(tfs_list, use.names = FALSE))
length(tfs_list)
head(tfs_list)

## -----------------------------------------------------------------------------
# load expression data.
eg_esc_file <- system.file("extra/ESC-expressed.txt", package = "rTRM")
eg_esc <- scan(eg_esc_file, what = "")
length(eg_esc)
head(eg_esc)

tfs_list_esc <- tfs_list[tfs_list %in% eg_esc]
length(tfs_list_esc)
head(tfs_list_esc)

## -----------------------------------------------------------------------------
# load and process PPI data.
biogrid_mm()
data(biogrid_mm)
ppi <- biogrid_mm
vcount(ppi)
ecount(ppi)

# remove outliers.
f <- c("Ubc", "Sumo1", "Sumo2", "Sumo3")
f <- select(org.Mm.eg.db, keys = f, columns = "ENTREZID", keytype = "SYMBOL")$ENTREZID
f

ppi <- removeVertices(ppi, f)
vcount(ppi)
ecount(ppi)

# filter by expression.
ppi_esc <- induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ])
vcount(ppi_esc)
ecount(ppi_esc)

# ensure a single component.
ppi_esc <- getLargestComp(ppi_esc)
vcount(ppi_esc)
ecount(ppi_esc)

## -----------------------------------------------------------------------------
# define target.
target <- select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID
target

# find TRM.
s <- findTRM(ppi_esc, target, tfs_list_esc, method = "nsa", max.bridge = 1)
vcount(s)
ecount(s)

## ----fig.small=TRUE, fig.cap = "Sox2 specific TRM in ESCs."-------------------
# generate layout (order by cluster, then label)
cl <- getConcentricList(s, target, tfs_list_esc)
l <- layout.concentric(s, cl, order = "label")

# plot TRM.
plotTRM(s, layout = l, vertex.cex = 15, label.cex = .8)
plotTRMlegend(s, title = "ESC Sox2 TRM", cex = .8)

## -----------------------------------------------------------------------------
library(rTRM)
library(BSgenome.Mmusculus.UCSC.mm8.masked) # Sox2 peaks found against mm8
library(PWMEnrich)
registerCoresPWMEnrich(1) # register number of cores for parallelization in PWMEnrich
library(MotifDb)

# select mouse PWMs:
sel.mm <- values(MotifDb)$organism %in% c("Mmusculus")
pwm.mm <- MotifDb[sel.mm]

## -----------------------------------------------------------------------------
# generate logn background model of PWMs:
p <- as.list(pwm.mm)
p <- lapply(p, function(x) round(x * 100))
p <- lapply(p, function(x) t(apply(x, 1, as.integer)))

## ----eval=FALSE---------------------------------------------------------------
#  pwm_logn <- makeBackground(p, Mmusculus, type = "logn")

## ----echo=FALSE---------------------------------------------------------------
load(system.file("extra/pwm_mm_logn.rda", package = "rTRM"))

## -----------------------------------------------------------------------------
sox2_bed <- read.table(system.file("extra/ESC_Sox2_peaks.txt", package = "rTRM"))

colnames(sox2_bed) <- c("chr", "start", "end")

sox2_seq <- getSequencesFromGenome(sox2_bed, Mmusculus, append.id="Sox2")

# PWMEnrich throws an error if the sequences are shorter than the motifs so we filter those sequences.
min.width <- max(sapply(p, ncol))
sox2_seq_filter <- sox2_seq[width(sox2_seq) >= min.width]

## ----eval=FALSE---------------------------------------------------------------
#  # find enrichment:
#  sox2_enr <- motifEnrichment(sox2_seq_filter, pwms=pwm_logn, group.only=TRUE)

## ----echo=FALSE---------------------------------------------------------------
load(system.file("extra/sox2_enr.rda", package = "rTRM"))

## ----fig.small=TRUE, fig.cap="Density of log2(raw.score) for group. The selected cutoff is indicated with a red line."----
res <- groupReport(sox2_enr)

plot(density(res$raw.score),main="",log="x",xlab="log(raw.score)")
abline(v=log2(5),col="red")
mtext(text="log2(5)",at=log2(5),side=3,cex=.8,col="red")

res.gene <- unique(values(MotifDb[res$id[res$raw.score > 5]])$geneId)
res.gene <- unique(na.omit(res.gene))

## ----fig.small=TRUE, fig.cap="Sox2 TRM identified using PWMEnrich for the motif enrichment."----
data(biogrid_mm)
ppi <- biogrid_mm
vcount(ppi)
ecount(ppi)

f <- c("Ubc", "Sumo1", "Sumo2", "Sumo3")
f <- select(org.Mm.eg.db,keys=f,columns="ENTREZID",keytype="SYMBOL")$ENTREZID
ppi <- removeVertices(ppi, f)
vcount(ppi)
ecount(ppi)

# filter by expression.
eg_esc <- scan(system.file("extra/ESC-expressed.txt", package = "rTRM"), what = "")
ppi_esc <- induced.subgraph(ppi, V(ppi)[ name %in% eg_esc ])
vcount(ppi_esc)
ecount(ppi_esc)

# ensure a single component.
ppi_esc <- getLargestComp(ppi_esc)
vcount(ppi_esc)
ecount(ppi_esc)

sox2.gene <- select(org.Mm.eg.db,keys="Sox2",columns="ENTREZID",keytype="SYMBOL")$ENTREZID
sox2_trm <- findTRM(ppi_esc, target=sox2.gene, query = res.gene)

cl <- getConcentricList(sox2_trm, t=sox2.gene,e=res.gene)
l <- layout.concentric(sox2_trm, concentric=cl, order="label")
plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .8)
plotTRMlegend(sox2_trm, title = "ESC Sox2 TRM", cex = .8)

## ----fig.small=TRUE, fig.cap="Similarity between the TRMs predicted using motif enrichment information from PWMEnrich and HOMER."----
m <- getSimilarityMatrix(list(PWMEnrich = sox2_trm, HOMER = s))
m

d <- as.data.frame.table(m)
g <- ggplot(d, aes(x = Var1, y = Var2, fill = Freq)) + 
  geom_tile() +
  scale_fill_gradient2(
    limit = c(0, 100),
    low = "white",
    mid = "darkblue",
    high = "orange",
    guide = guide_legend("similarity", reverse = TRUE),
    midpoint = 50
  ) +
  labs(x = NULL, y = NULL) +
  theme(aspect.ratio = 1,
        axis.text.x = element_text(
          angle = 90,
          vjust = .5,
          hjust = 1
        ))

## ----fig.small=TRUE, fig.width=2.5,fig.height=2.5,fig.cap="Sox2 TRM obtained with PWMEnrich workflow and layout.concentric is shown in the left. Same TRM with layout.arc is shown in the right."----
plotTRM(sox2_trm, layout = l, vertex.cex = 15, label.cex = .7)
l=layout.arc(sox2_trm,target=sox2.gene,query=res.gene)
plotTRM(sox2_trm, layout=l,vertex.cex=15,label.cex=.7)

## -----------------------------------------------------------------------------
citation(package = "rTRM")

## -----------------------------------------------------------------------------
sessionInfo()