## ----setup,echo=FALSE----------------------------------------------------
suppressPackageStartupMessages({
library(BiocStyle)
library(TFutils)
library(org.Hs.eg.db)
library(GenomicFiles)
library(GO.db)
library(data.table)
library(knitr)
library(ggplot2)
library(data.table)
library(SummarizedExperiment)
library(BiocParallel)
data(fimoMap)
})

## ----setup2, echo=FALSE--------------------------------------------------
library(TFutils)
library(AnnotationDbi)
suppressMessages({
tfdf = select(org.Hs.eg.db::org.Hs.eg.db, 
    keys="GO:0003700", keytype="GO", 
    columns=c("ENTREZID", "SYMBOL"))
})
tfdf = tfdf[, c("ENTREZID", "SYMBOL")]
TFs_GO = TFCatalog(name="GO.0003700", nativeIds=tfdf$ENTREZID,
 HGNCmap=tfdf)

data(tftColl)
data(tftCollMap)
TFs_MSIG = TFCatalog(name="MsigDb.TFT", nativeIds=names(tftColl),
 HGNCmap=data.frame(tftCollMap,stringsAsFactors=FALSE))

data(cisbpTFcat)
TFs_CISBP = TFCatalog(name="CISBP.info", nativeIds=cisbpTFcat[,1],
 HGNCmap = cisbpTFcat)

data(hocomoco.mono)
TFs_HOCO = TFCatalog(name="hocomoco11", nativeIds=hocomoco.mono[,1],
 HGNCmap=hocomoco.mono)

## ----lkupset,echo=FALSE,out.width='60%', fig.cap='Sizes of TF catalogs and of intersections based on HGNC symbols for TFs.', fig.pos='h'----
suppressPackageStartupMessages({library(UpSetR)})
allhg = keys(org.Hs.eg.db::org.Hs.eg.db, keytype="SYMBOL")
#activesym = unique(unlist(list(TFs_GO@HGNCmap[,2], TFs_HOCO@HGNCmap[,2], TFs_MSIG@HGNCmap[,2], TFs_CISBP@HGNCmap[,2])))
activesym = unique(unlist(list(HGNCmap(TFs_GO)[,2], HGNCmap(TFs_HOCO)[,2], HGNCmap(TFs_MSIG)[,2], HGNCmap(TFs_CISBP)[,2])))
use = intersect(allhg, activesym)
mymat = matrix(0, nr=length(use), nc=4)
rownames(mymat) = use
iu = function(x) intersect(x,use)
mymat[na.omit(iu(HGNCmap(TFs_GO)[,2])),1] = 1
mymat[na.omit(iu(HGNCmap(TFs_MSIG)[,2])),2] = 1
mymat[na.omit(iu(HGNCmap(TFs_HOCO)[,2])),3] = 1
mymat[na.omit(iu(HGNCmap(TFs_CISBP)[,2])),4] = 1
colnames(mymat) = c("GO", "MSigDb", "HOCO", "CISBP")
upset(data.frame(mymat),nsets=4,sets=c("MSigDb", "HOCO", "GO", "CISBP"), keep.order=TRUE, order.by="degree"
)

## ----TFclass, out.width='110%', fig.cap = 'Screenshots of AmiGO and TFClass hierarchy excerpts.',echo=FALSE----
knitr::include_graphics('AMIGOplus.png')

## ----dodo,echo=FALSE-----------------------------------------------------
library(knitr)
cismap = HGNCmap(TFs_CISBP)
scis = split(cismap, cismap$HGNC)
uf = vapply(scis, function(x) x$Family_Name[1],"character")
CISTOP = sort(table(uf),decreasing=TRUE)[1:10]
hoc = HGNCmap(TFs_HOCO)
shoc = split(hoc, hoc$HGNC)
sfam = vapply(shoc, function(x)x$`TF family`[1], "character")
HOTOP = sort(table(sfam),decreasing=TRUE)[1:10]
kable(data.frame(CISBP=names(CISTOP), Nc=as.numeric(CISTOP), 
   HOCOMOCO=names(HOTOP), Nh=as.numeric(HOTOP)), format="markdown")

## ----lkbro---------------------------------------------------------------
TFutils::tftColl

## ----lktft2--------------------------------------------------------------
grep("NFK", names(TFutils::tftColl), value=TRUE)

## ----demoredu------------------------------------------------------------
library(GenomicFiles)
data(fimo16)
fimo16
head(colData(fimo16))

## ----demoredu2, cache=TRUE-----------------------------------------------
library(BiocParallel)
register(SerialParam()) # important for macosx?
rowRanges(fimo16) = GRanges("chr17", IRanges(38.077e6, 38.084e6))
rr = GenomicFiles::reduceByFile(fimo16, MAP=function(r,f)
  scanTabix(f, param=r))

## ----paraa---------------------------------------------------------------
asdf = function(x) data.table::fread(paste0(x, collapse="\n"), header=FALSE)
gg = lapply(rr, function(x) {
       tmp = asdf(x[[1]][[1]]) 
       data.frame(loc=tmp$V2, score=-log10(tmp$V7))
     })
for (i in 1:length(gg))  gg[[i]]$tf = colData(fimo16)[i,2]

## ----domat---------------------------------------------------------------
matchcis = match(colData(fimo16)[,2], cisbpTFcat[,2])
famn = cisbpTFcat[matchcis,]$Family_Name
for (i in 1:length(gg))  gg[[i]]$tffam = famn[i]
nn = do.call(rbind, gg)

## ----finish, fig.height=3.5----------------------------------------------
library(ggplot2)
ggplot(nn, aes(x=loc,y=score,group=tffam, colour=tffam)) + geom_point()