## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(PhosR)
})

## -----------------------------------------------------------------------------
data("phospho.cells.Ins.sample")
grps = gsub("_[0-9]{1}", "", colnames(phospho.cells.Ins))

## -----------------------------------------------------------------------------
# FL38B
gsub("Intensity.", "", grps)[1:12]
# Hepa1
gsub("Intensity.", "", grps)[13:24]

## -----------------------------------------------------------------------------
dim(phospho.cells.Ins)

## -----------------------------------------------------------------------------
phospho.cells.Ins.filtered <- selectGrps(phospho.cells.Ins, grps, 0.5, n=1) 
dim(phospho.cells.Ins.filtered)

## -----------------------------------------------------------------------------
# In cases where you have fewer replicates ( e.g.,triplicates), you may want to 
# select phosphosites quantified in 70% of replicates. 
phospho.cells.Ins.filtered1 <- selectGrps(phospho.cells.Ins, grps, 0.7, n=1) 
dim(phospho.cells.Ins.filtered1)

## -----------------------------------------------------------------------------
set.seed(123)
phospho.cells.Ins.impute1 <- 
    scImpute(phospho.cells.Ins.filtered, 0.5, 
        grps)[,colnames(phospho.cells.Ins.filtered)]

## -----------------------------------------------------------------------------
set.seed(123)
phospho.cells.Ins.impute <- phospho.cells.Ins.impute1
phospho.cells.Ins.impute[,1:5] <- ptImpute(phospho.cells.Ins.impute1[,6:10], 
                                            phospho.cells.Ins.impute1[,1:5], 
                                            percent1 = 0.6, percent2 = 0, 
                                            paired = FALSE)
phospho.cells.Ins.impute[,11:15] <- ptImpute(phospho.cells.Ins.impute1[,16:20], 
                                            phospho.cells.Ins.impute1[,11:15], 
                                            percent1 = 0.6, percent2 = 0, 
                                            paired = FALSE)

## -----------------------------------------------------------------------------
phospho.cells.Ins.ms <- medianScaling(phospho.cells.Ins.impute, scale = FALSE)

## -----------------------------------------------------------------------------
cols <- rep(c("#ED4024", "#7FBF42", "#3F61AD", "#9B822F"), each=6)
par(mfrow=c(1,2))
plotQC(phospho.cells.Ins.filtered, labels=colnames(phospho.cells.Ins.filtered), 
        panel = 1, cols = cols)
plotQC(phospho.cells.Ins.ms, labels=colnames(phospho.cells.Ins.ms), panel = 1, 
        cols = cols)

## -----------------------------------------------------------------------------
par(mfrow=c(1,2))
plotQC(phospho.cells.Ins.filtered, labels=colnames(phospho.cells.Ins.filtered), 
        panel = 2, cols = cols)
plotQC(phospho.cells.Ins.ms, labels=colnames(phospho.cells.Ins.ms), panel = 2, 
        cols = cols)

## -----------------------------------------------------------------------------
data("phospho_L6_ratio")
data("SPSs")

## -----------------------------------------------------------------------------
colnames(phospho.L6.ratio)[grepl("AICAR_", colnames(phospho.L6.ratio))]
colnames(phospho.L6.ratio)[grepl("^Ins_", colnames(phospho.L6.ratio))]
colnames(phospho.L6.ratio)[grepl("AICARIns_", colnames(phospho.L6.ratio))]

## -----------------------------------------------------------------------------
dim(phospho.L6.ratio)

## -----------------------------------------------------------------------------
sum(is.na(phospho.L6.ratio))

## -----------------------------------------------------------------------------
# Cleaning phosphosite label
phospho.site.names = rownames(phospho.L6.ratio)
head(phospho.site.names)

## -----------------------------------------------------------------------------
L6.sites = gsub(" ", "", sapply(strsplit(rownames(phospho.L6.ratio), "~"),
                                function(x){paste(toupper(x[2]), x[3], "", 
                                                    sep=";")}))
phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites), 
                            colMeans))
head(rownames(phospho.L6.ratio))
phospho.site.names = split(phospho.site.names, L6.sites)

## -----------------------------------------------------------------------------
# take the grouping information
grps = gsub("_.+", "", colnames(phospho.L6.ratio))
grps

## -----------------------------------------------------------------------------
cs = rainbow(length(unique(grps)))
colorCodes = sapply(grps, switch, AICAR=cs[1], Ins=cs[2], AICARIns=cs[3])

par(mfrow=c(1,1))
plotQC(phospho.L6.ratio, panel = 2, cols=colorCodes, 
        main = "Before batch correction")

## -----------------------------------------------------------------------------
par(mfrow=c(1,1))
plotQC(phospho.L6.ratio, cols=colorCodes, labels = colnames(phospho.L6.ratio), 
        panel = 4, ylim=c(-20, 20), xlim=c(-30, 30), 
        main = "Before batch correction")

## -----------------------------------------------------------------------------
design = model.matrix(~ grps - 1)
design

## -----------------------------------------------------------------------------
# phosphoproteomics data normalisation and batch correction using RUV
ctl = which(rownames(phospho.L6.ratio) %in% SPSs)
phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3, 
                                    ctl = ctl)

## -----------------------------------------------------------------------------
# plot after batch correction
par(mfrow=c(1,2))
plotQC(phospho.L6.ratio, panel = 2, cols=colorCodes)
plotQC(phospho.L6.ratio.RUV, cols=colorCodes, 
        labels = colnames(phospho.L6.ratio), panel=2, ylim=c(-20, 20), 
        xlim=c(-30, 30))

## -----------------------------------------------------------------------------
par(mfrow=c(1,2))
plotQC(phospho.L6.ratio, panel = 4, cols=colorCodes, 
        labels = colnames(phospho.L6.ratio), main="Before Batch correction")
plotQC(phospho.L6.ratio.RUV, cols=colorCodes, 
        labels = colnames(phospho.L6.ratio), panel=4, ylim=c(-20, 20), 
        xlim=c(-30, 30), main="After Batch correction")

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(calibrate)
    library(limma)
    library(directPA)
})

## -----------------------------------------------------------------------------
data("PhosphoSitePlus")

## -----------------------------------------------------------------------------
# divides the phospho.L6.ratio data into groups by phosphosites
L6.sites <- gsub(" ", "", gsub("~[STY]", "~", 
                                sapply(strsplit(rownames(phospho.L6.ratio.RUV), 
                                                "~"), 
                                    function(x){paste(toupper(x[2]), x[3], 
                                                        sep="~")})))
phospho.L6.ratio.sites <- t(sapply(split(data.frame(phospho.L6.ratio.RUV),
                                        L6.sites), colMeans))

# fit linear model for each phosphosite
f <- gsub("_exp\\d", "", colnames(phospho.L6.ratio.RUV))
X <- model.matrix(~ f - 1)
fit <- lmFit(phospho.L6.ratio.RUV, X)

# extract top-ranked phosphosites for each condition compared to basal
table.AICAR <- topTable(eBayes(fit), number=Inf, coef = 1)
table.Ins <- topTable(eBayes(fit), number=Inf, coef = 3)
table.AICARIns <- topTable(eBayes(fit), number=Inf, coef = 2)

DE1.RUV <- c(sum(table.AICAR[,"adj.P.Val"] < 0.05), 
            sum(table.Ins[,"adj.P.Val"] < 0.05), 
            sum(table.AICARIns[,"adj.P.Val"] < 0.05))

# extract top-ranked phosphosites for each group comparison
contrast.matrix1 <- makeContrasts(fAICARIns-fIns, levels=X)  
contrast.matrix2 <- makeContrasts(fAICARIns-fAICAR, levels=X)  
fit1 <- contrasts.fit(fit, contrast.matrix1)
fit2 <- contrasts.fit(fit, contrast.matrix2)
table.AICARInsVSIns <- topTable(eBayes(fit1), number=Inf)
table.AICARInsVSAICAR <- topTable(eBayes(fit2), number=Inf)

DE2.RUV <- c(sum(table.AICARInsVSIns[,"adj.P.Val"] < 0.05), 
            sum(table.AICARInsVSAICAR[,"adj.P.Val"] < 0.05))

o <- rownames(table.AICARInsVSIns)
Tc <- cbind(table.Ins[o,"logFC"], table.AICAR[o,"logFC"], 
            table.AICARIns[o,"logFC"])
rownames(Tc) = gsub("(.*)(;[A-Z])([0-9]+)(;)", "\\1;\\3;", o)
colnames(Tc) <- c("Ins", "AICAR", "AICAR+Ins")

# summary phosphosite-level information to proteins for performing downstream 
# gene-centric analyses.
Tc.gene <- phosCollapse(Tc, id=gsub(";.+", "", rownames(Tc)), 
                        stat=apply(abs(Tc), 1, max), by = "max")
geneSet <- names(sort(Tc.gene[,1], 
                        decreasing = TRUE))[1:round(nrow(Tc.gene) * 0.1)]

# 1D gene-centric pathway analysis
path1 <- pathwayOverrepresent(geneSet, annotation=Pathways.reactome, 
                                universe = rownames(Tc.gene), alter = "greater")
path2 <- pathwayRankBasedEnrichment(Tc.gene[,1], 
                                    annotation=Pathways.reactome, 
                                    alter = "greater")

## -----------------------------------------------------------------------------
lp1 <- -log10(as.numeric(path2[names(Pathways.reactome),1]))
lp2 <- -log10(as.numeric(path1[names(Pathways.reactome),1]))
plot(lp1, lp2, ylab="Overrepresentation (-log10 pvalue)", 
        xlab="Rank-based enrichment (-log10 pvalue)", 
        main="Comparison of 1D pathway analyses", xlim = c(0, 10))

# select highly enriched pathways
sel <- which(lp1 > 1.5 & lp2 > 0.9)
textxy(lp1[sel], lp2[sel], gsub("_", " ", gsub("REACTOME_", "", 
                                                names(Pathways.reactome)))[sel])

## -----------------------------------------------------------------------------
# 2D direction site-centric kinase activity analyses
par(mfrow=c(1,2))
dpa1 <- directPA(Tc[,c(1,3)], direction=0, 
                annotation=lapply(PhosphoSite.rat, 
                                    function(x){gsub(";[STY]", ";", x)}), 
                main="Direction pathway analysis")
dpa2 <- directPA(Tc[,c(1,3)], direction=pi*7/4, 
                annotation=lapply(PhosphoSite.rat, 
                                    function(x){gsub(";[STY]", ";", x)}), 
                main="Direction pathway analysis")

# top activated kinases
dpa1$pathways[1:5,]
dpa2$pathways[1:5,]


## -----------------------------------------------------------------------------
z1 <- perturbPlot2d(Tc=Tc[,c(1,2)], 
                    annotation=lapply(PhosphoSite.rat, 
                                    function(x){gsub(";[STY]", ";", x)}),
                    cex=1, xlim=c(-2, 4), ylim=c(-2, 4), 
                    main="Kinase perturbation analysis")
z2 <- perturbPlot2d(Tc=Tc[,c(1,3)], 
                    annotation=lapply(PhosphoSite.rat, 
                                    function(x){gsub(";[STY]", ";", x)}),
                    cex=1, xlim=c(-2, 4), ylim=c(-2, 4), 
                    main="Kinase perturbation analysis")
z3 <- perturbPlot2d(Tc=Tc[,c(2,3)], 
                    annotation=lapply(PhosphoSite.rat, 
                                    function(x){gsub(";[STY]", ";", x)}),      
                    cex=1, xlim=c(-2, 4), ylim=c(-2, 4), 
                    main="Kinase perturbation analysis")

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(parallel)
    library(ggplot2)
    library(ClueR)
})

## -----------------------------------------------------------------------------
data("phospho_liverInsTC_RUV_sample")

## -----------------------------------------------------------------------------
rownames(phospho.liver.Ins.TC.ratio.RUV) <- 
    sapply(strsplit(rownames(phospho.liver.Ins.TC.ratio.RUV), "~"), 
        function(x)paste(x[1], x[2], "", sep=";"))

# take grouping information
grps <- sapply(strsplit(colnames(phospho.liver.Ins.TC.ratio.RUV), "_"), 
            function(x)x[3])

# select differentially phosphorylated sites
sites.p <- matANOVA(phospho.liver.Ins.TC.ratio.RUV, grps)
phospho.LiverInsTC <- meanAbundance(phospho.liver.Ins.TC.ratio.RUV, grps)
sel <- which((sites.p < 0.05) & (rowSums(abs(phospho.LiverInsTC) > 1) != 0))
phospho.LiverInsTC.sel <- phospho.LiverInsTC[sel,]

# summarise phosphosites information into gene level
phospho.liverInsTC.gene <- 
    phosCollapse(phospho.LiverInsTC.sel, 
            gsub(";.+", "", rownames(phospho.LiverInsTC.sel)), 
                stat = apply(abs(phospho.LiverInsTC.sel), 1, max), by = "max")

# perform ClueR to identify optimal number of clusters
RNGkind("L'Ecuyer-CMRG")
set.seed(123)
c1 <- runClue(phospho.liverInsTC.gene, annotation=Pathways.reactome, 
            kRange = 2:10, rep = 5, effectiveSize = c(5, 100), 
            pvalueCutoff = 0.05, alpha = 0.5)

# Visualise the evaluation results
data <- data.frame(Success=as.numeric(c1$evlMat), Freq=rep(2:10, each=5))
myplot <- ggplot(data, aes(x=Freq, y=Success)) + 
    geom_boxplot(aes(x = factor(Freq), fill="gray")) +
    stat_smooth(method="loess", colour="red", size=3, span = 0.5) +
    xlab("# of cluster") + 
    ylab("Enrichment score") + 
    theme_classic()
myplot

set.seed(123)
best <- clustOptimal(c1, rep=5, mfrow=c(2, 2), visualize = TRUE)

# Finding enriched pathways from each cluster
# ps <- sapply(best$enrichList, function(x){
#   l <- ifelse(nrow(x) < 3, nrow(x), 3)
#   x[1:l,2]
# })
# par(mfrow = c(1,1))
# barplot(-log10(as.numeric(unlist(ps))))

## -----------------------------------------------------------------------------
RNGkind("L'Ecuyer-CMRG")
set.seed(1)
PhosphoSite.mouse2 = mapply(function(kinase) {
    gsub("(.*)(;[A-Z])([0-9]+;)", "\\1;\\3", kinase)
}, PhosphoSite.mouse)

# perform ClueR to identify optimal number of clusters
c3 <- runClue(phospho.LiverInsTC.sel, annotation=PhosphoSite.mouse2, 
            kRange = 2:10, rep = 5, effectiveSize = c(5, 100), 
            pvalueCutoff = 0.05, alpha = 0.5)

# Visualise the evaluation results
data <- data.frame(Success=as.numeric(c3$evlMat), Freq=rep(2:10, each=5))
myplot <- ggplot(data, aes(x=Freq, y=Success)) + 
    geom_boxplot(aes(x = factor(Freq), fill="gray")) +
    stat_smooth(method="loess", colour="red", size=3, span = 0.5) + 
    xlab("# of cluster") + 
    ylab("Enrichment score") + 
    theme_classic()
myplot

set.seed(1)
best <- clustOptimal(c3, rep=10, mfrow=c(2, 3), visualize = TRUE)

# Finding enriched pathways from each cluster
best$enrichList

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(dplyr)
    library(ggplot2)
    library(GGally)
    library(ggpubr)
    library(calibrate)
})

## -----------------------------------------------------------------------------
data("KinaseMotifs")
data("KinaseFamily")

## -----------------------------------------------------------------------------
phosphoL6 = phospho.L6.ratio.RUV

## -----------------------------------------------------------------------------
rownames(phosphoL6) = phospho.site.names

# filter for up-regulated phosphosites
phosphoL6.mean <- meanAbundance(phosphoL6, grps = gsub("_.+", "", 
                                                        colnames(phosphoL6)))
aov <- matANOVA(mat=phosphoL6, grps=gsub("_.+", "", colnames(phosphoL6)))
phosphoL6.reg <- phosphoL6[(aov < 0.05) & 
                            (rowSums(phosphoL6.mean > 0.5) > 0), ,drop = FALSE]
L6.phos.std <- standardise(phosphoL6.reg) 
rownames(L6.phos.std) <- 
    sapply(strsplit(rownames(L6.phos.std), "~"), 
        function(x){gsub(" ", "", paste(toupper(x[2]), x[3], "", sep=";"))})

## -----------------------------------------------------------------------------
L6.phos.seq <- sapply(strsplit(rownames(phosphoL6.reg), "~"), function(x)x[4])

## -----------------------------------------------------------------------------
L6.matrices <- kinaseSubstrateScore(PhosphoSite.mouse, L6.phos.std, 
                                    L6.phos.seq, numMotif = 5, numSub = 1)
set.seed(1)
L6.predMat <- kinaseSubstratePred(L6.matrices, top=30) 

## -----------------------------------------------------------------------------
kinaseOI = c("PRKAA1", "AKT1")

Signalomes_results <- Signalomes(KSR=L6.matrices, 
                                predMatrix=L6.predMat, 
                                exprsMat=L6.phos.std, 
                                KOI=kinaseOI)

## -----------------------------------------------------------------------------
my_color_palette <- 
    grDevices::colorRampPalette(RColorBrewer::brewer.pal(8, "Accent"))
kinase_all_color <- my_color_palette(ncol(L6.matrices$combinedScoreMatrix))
names(kinase_all_color) <- colnames(L6.matrices$combinedScoreMatrix)
kinase_signalome_color <- kinase_all_color[colnames(L6.predMat)]

dftoPlot_signalome <- stack(Signalomes_results$kinaseSubstrates)
modules <- Signalomes_results$proteinModule
names(modules) <- 
    sapply(strsplit(as.character(names(Signalomes_results$proteinModules)), 
                    ";"), "[[", 1)
dftoPlot_signalome$cluster <- modules[dftoPlot_signalome$values]

dftoPlot_balloon_bycluster <- dftoPlot_signalome
dftoPlot_balloon_bycluster <- na.omit(dftoPlot_balloon_bycluster) %>% 
    dplyr::count(cluster, ind)
dftoPlot_balloon_bycluster$ind <- as.factor(dftoPlot_balloon_bycluster$ind)
dftoPlot_balloon_bycluster$cluster <- 
    as.factor(dftoPlot_balloon_bycluster$cluster)
dftoPlot_balloon_bycluster <- 
    tidyr::spread(dftoPlot_balloon_bycluster, ind, n)[,-1]
dftoPlot_balloon_bycluster[is.na(dftoPlot_balloon_bycluster)] <- 0

dftoPlot_balloon_bycluster <- 
    do.call(rbind, lapply(1:nrow(dftoPlot_balloon_bycluster), function(x) {
        res <- sapply(dftoPlot_balloon_bycluster[x,], function(y) 
            y/sum(dftoPlot_balloon_bycluster[x,])*100)
}))

dftoPlot_balloon_bycluster <- 
    reshape2::melt(as.matrix(dftoPlot_balloon_bycluster))
colnames(dftoPlot_balloon_bycluster) <- c("cluster", "ind", "n")

ggplot(dftoPlot_balloon_bycluster, aes(x = ind, y = cluster)) + 
    geom_point(aes(col=ind, size=n)) + 
    scale_color_manual(values=kinase_signalome_color) + 
    scale_size_continuous(range = c(2, 17)) + 
    theme_classic() + 
    theme(
        aspect.ratio=0.25, 
        legend.position = "bottom",
        axis.line = element_blank(),            
        axis.title = element_blank(),   
        panel.grid.major.x = element_blank(),  
        panel.grid.minor.x = element_blank()) 

## -----------------------------------------------------------------------------
threskinaseNetwork = 0.9
signalomeKinase <- colnames(L6.predMat)
kinase_cor <- stats::cor(L6.matrices$combinedScoreMatrix)

cor_kinase_mat <- kinase_cor
diag(cor_kinase_mat) <- 0
kinase_network <- lapply(1:ncol(cor_kinase_mat), function(x) 
    names(which(cor_kinase_mat[,x] > threskinaseNetwork))) 
names(kinase_network) <- colnames(cor_kinase_mat)

cor_kinase_mat <- apply(cor_kinase_mat, 2, function(x) x  > threskinaseNetwork)
cor_kinase_mat[cor_kinase_mat == FALSE] <- 0
cor_kinase_mat[cor_kinase_mat == TRUE] <- 1

library(network)
links <- reshape2::melt(cor_kinase_mat)
links <- links[links$value == 1,]
res <- sapply(1:length(links$Var1), function(x) { 
    kinase_cor[rownames(kinase_cor) == links$Var1[x], 
        colnames(kinase_cor) == links$Var2[x]]
})
links$cor <- res
colnames(links) <- c("source", "target", "binary", "cor")

network <- network::network(cor_kinase_mat, directed=FALSE)
GGally::ggnet2(network, 
    node.size=10, 
    node.color=kinase_all_color, 
    edge.size = 0.5, 
    size = "degree",
    size.cut=3,
    label=colnames(cor_kinase_mat),
    label.size=2,
    mode="circle",
    label.color="black")

## -----------------------------------------------------------------------------
sessionInfo()