## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html
)

## ----message = FALSE----------------------------------------------------------
library(BioPlex)
library(AnnotationDbi)
library(AnnotationHub)
library(graph)

## ----ahub, message = FALSE----------------------------------------------------
ah <- AnnotationHub::AnnotationHub()

## ----ehub, message = FALSE----------------------------------------------------
eh <- ExperimentHub::ExperimentHub()

## ----orgdb, message = FALSE---------------------------------------------------
orgdb <- AnnotationHub::query(ah, c("orgDb", "Homo sapiens"))
orgdb <- orgdb[[1]]
orgdb
keytypes(orgdb)

## ----corumCore----------------------------------------------------------------
core <- getCorum(set = "core", organism = "Human")

## ----corum2glist--------------------------------------------------------------
core.glist <- corum2graphlist(core, subunit.id.type = "UNIPROT")

## ----corum-subunit------------------------------------------------------------
has.cdk2 <- hasSubunit(core.glist, 
                       subunit = "CDK2",
                       id.type = "SYMBOL")

## ----corum-subunit2-----------------------------------------------------------
table(has.cdk2)
cdk2.glist <- core.glist[has.cdk2]
lapply(cdk2.glist, function(g) unlist(graph::nodeData(g, attr = "SYMBOL")))

## ----corum-subunit3, message = FALSE, eval = FALSE----------------------------
#  plot(cdk2.glist[[1]], main = names(cdk2.glist)[1])

## ----bioplex293T--------------------------------------------------------------
bp.293t <- getBioPlex(cell.line = "293T", version = "3.0")

## ----bpgraph------------------------------------------------------------------
bp.gr <- bioplex2graph(bp.293t)

## -----------------------------------------------------------------------------
n <- graph::nodes(cdk2.glist[[1]])
bp.sgr <- graph::subGraph(n, bp.gr)
bp.sgr

## -----------------------------------------------------------------------------
bp.gr <- BioPlex::annotatePFAM(bp.gr, orgdb)

## -----------------------------------------------------------------------------
unip2pfam <- graph::nodeData(bp.gr, graph::nodes(bp.gr), "PFAM")
pfam2unip <- stack(unip2pfam)
pfam2unip <- split(as.character(pfam2unip$ind), pfam2unip$values)
head(pfam2unip, 2)

## -----------------------------------------------------------------------------
scan.unip <- pfam2unip[["PF02023"]]
getIAPfams <- function(n) graph::nodeData(bp.gr, graph::edges(bp.gr)[[n]], "PFAM")
unip2iapfams <- lapply(scan.unip, getIAPfams)
unip2iapfams <- lapply(unip2iapfams, unlist)
names(unip2iapfams) <- scan.unip

## -----------------------------------------------------------------------------
pfam2iapfams <- unlist(unip2iapfams)
sort(table(pfam2iapfams), decreasing = TRUE)[1:5]

## ----cnv-seq------------------------------------------------------------------
cnv.hmm <- getHEK293GenomeTrack(track = "cnv.hmm", cell.line = "293T")
cnv.hmm

## ----cnv-snp------------------------------------------------------------------
cnv.snp <- getHEK293GenomeTrack(track = "cnv.snp", cell.line = "293T")
cnv.snp

## ----hg18-genes---------------------------------------------------------------
AnnotationHub::query(ah, c("TxDb", "Homo sapiens"))
txdb <- ah[["AH52257"]]
gs <- GenomicFeatures::genes(txdb)
gs

## -----------------------------------------------------------------------------
olaps <- GenomicRanges::findOverlaps(gs, cnv.snp)
joined <- gs[S4Vectors::queryHits(olaps)]
joined$score <- cnv.snp$score[S4Vectors::subjectHits(olaps)]
joined

## -----------------------------------------------------------------------------
olaps <- GenomicRanges::findOverlaps(gs, cnv.hmm)
joined2 <- gs[S4Vectors::queryHits(olaps)]
joined2$score <- cnv.hmm$score[S4Vectors::subjectHits(olaps)]
joined2

## -----------------------------------------------------------------------------
isect <- intersect(names(joined), names(joined2))
joined <- joined[isect]
joined2 <- joined2[isect]

## -----------------------------------------------------------------------------
cor(joined$score, joined2$score)
cor.test(joined$score, joined2$score)

## ----fig.width = 8, fig.height = 8--------------------------------------------
spl <- split(joined2$score, joined$score)
boxplot(spl, xlab = "SNP-inferred copy number", ylab = "sequencing-inferred copy number")
rho <- cor(joined$score, joined2$score)
rho <- paste("cor", round(rho, digits=3), sep=" = ")

p <- cor.test(joined$score, joined2$score)
p <- p$p.value
p <- "   p < 2.2e-16"

legend("topright", legend=c(rho, p))

## ----gse122425----------------------------------------------------------------
se <- getGSE122425()
se

## ----prey-expression----------------------------------------------------------
bait <- unique(bp.293t$SymbolA)
length(bait)
prey <- unique(bp.293t$SymbolB)
length(prey)

## -----------------------------------------------------------------------------
ind <- match(prey, rowData(se)$SYMBOL)
par(las = 2)
boxplot(log2(assay(se, "rpkm") + 0.5)[ind,], 
        names = se$title, 
        ylab = "log2 RPKM")

## ----prey-expression2---------------------------------------------------------
# background: how many genes in total are expressed in all three WT reps
gr0 <- rowSums(assay(se)[,1:3] > 0)
table(gr0 == 3)
# prey: expressed in all three WT reps
table(gr0[ind] == 3)
# prey: expressed in at least one WT rep
table(gr0[ind] > 0)

## ----prey-expression-ora------------------------------------------------------
exprTable <-
     matrix(c(9346, 1076, 14717, 32766),
            nrow = 2,
            dimnames = list(c("Expressed", "Not.expressed"),
                            c("In.prey.set", "Not.in.prey.set")))
exprTable

## ----prey-expression-ora2-----------------------------------------------------
fisher.test(exprTable, alternative = "greater")

## ----prey-expression-293T-perm------------------------------------------------
permgr0 <- function(gr0, nr.genes = length(prey)) 
{
    ind <- sample(seq_along(gr0), nr.genes)
    sum(gr0[ind] == 3)
}

## ----prey-expression-perm2----------------------------------------------------
perms <- replicate(permgr0(gr0), 1000)
summary(perms)
(sum(perms >= 9346) + 1) / 1001

## ----prey-freq----------------------------------------------------------------
prey.freq <- sort(table(bp.293t$SymbolB), decreasing = TRUE)
preys <- names(prey.freq)
prey.freq <- as.vector(prey.freq)
names(prey.freq) <- preys
head(prey.freq)
summary(prey.freq)
hist(prey.freq, breaks = 50, main = "", xlab = "Number of PPIs", ylab = "Number of genes")

## -----------------------------------------------------------------------------
ind <- match(names(prey.freq), rowData(se)$SYMBOL)
rmeans <- rowMeans(assay(se, "rpkm")[ind, 1:3])
log.rmeans <- log2(rmeans + 0.5)
par(pch = 20)
plot( x = prey.freq,
      y = log.rmeans,
      xlab = "prey frequency",
      ylab = "log2 RPKM")
cor(prey.freq, 
    log.rmeans,
    use = "pairwise.complete.obs")

## ----bp.prot------------------------------------------------------------------
bp.prot <- getBioplexProteome()
bp.prot
rowData(bp.prot)

## -----------------------------------------------------------------------------
rowSums(assay(bp.prot)[1:5,]) 

## -----------------------------------------------------------------------------
ratio <- rowMeans(assay(bp.prot)[1:5, 1:5]) / rowMeans(assay(bp.prot)[1:5, 6:10])
log2(ratio)

## -----------------------------------------------------------------------------
t.test(assay(bp.prot)[1, 1:5], assay(bp.prot)[1, 6:10])

## ----sessionInfo--------------------------------------------------------------
sessionInfo()