## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(PhosR) }) ## ----------------------------------------------------------------------------- data("phospho.cells.Ins.sample") grps = gsub("_[0-9]{1}", "", colnames(phospho.cells.Ins)) ## ----------------------------------------------------------------------------- # FL38B gsub("Intensity.", "", grps)[1:12] # Hepa1 gsub("Intensity.", "", grps)[13:24] ## ----------------------------------------------------------------------------- dim(phospho.cells.Ins) ## ----------------------------------------------------------------------------- phospho.cells.Ins.filtered <- selectGrps(phospho.cells.Ins, grps, 0.5, n=1) dim(phospho.cells.Ins.filtered) ## ----------------------------------------------------------------------------- # In cases where you have fewer replicates ( e.g.,triplicates), you may want to # select phosphosites quantified in 70% of replicates. phospho.cells.Ins.filtered1 <- selectGrps(phospho.cells.Ins, grps, 0.7, n=1) dim(phospho.cells.Ins.filtered1) ## ----------------------------------------------------------------------------- set.seed(123) phospho.cells.Ins.impute1 <- scImpute(phospho.cells.Ins.filtered, 0.5, grps)[,colnames(phospho.cells.Ins.filtered)] ## ----------------------------------------------------------------------------- set.seed(123) phospho.cells.Ins.impute <- phospho.cells.Ins.impute1 phospho.cells.Ins.impute[,1:5] <- ptImpute(phospho.cells.Ins.impute1[,6:10], phospho.cells.Ins.impute1[,1:5], percent1 = 0.6, percent2 = 0, paired = FALSE) phospho.cells.Ins.impute[,11:15] <- ptImpute(phospho.cells.Ins.impute1[,16:20], phospho.cells.Ins.impute1[,11:15], percent1 = 0.6, percent2 = 0, paired = FALSE) ## ----------------------------------------------------------------------------- phospho.cells.Ins.ms <- medianScaling(phospho.cells.Ins.impute, scale = FALSE) ## ----------------------------------------------------------------------------- cols <- rep(c("#ED4024", "#7FBF42", "#3F61AD", "#9B822F"), each=6) par(mfrow=c(1,2)) plotQC(phospho.cells.Ins.filtered, labels=colnames(phospho.cells.Ins.filtered), panel = 1, cols = cols) plotQC(phospho.cells.Ins.ms, labels=colnames(phospho.cells.Ins.ms), panel = 1, cols = cols) ## ----------------------------------------------------------------------------- par(mfrow=c(1,2)) plotQC(phospho.cells.Ins.filtered, labels=colnames(phospho.cells.Ins.filtered), panel = 2, cols = cols) plotQC(phospho.cells.Ins.ms, labels=colnames(phospho.cells.Ins.ms), panel = 2, cols = cols) ## ----------------------------------------------------------------------------- data("phospho_L6_ratio") data("SPSs") ## ----------------------------------------------------------------------------- colnames(phospho.L6.ratio)[grepl("AICAR_", colnames(phospho.L6.ratio))] colnames(phospho.L6.ratio)[grepl("^Ins_", colnames(phospho.L6.ratio))] colnames(phospho.L6.ratio)[grepl("AICARIns_", colnames(phospho.L6.ratio))] ## ----------------------------------------------------------------------------- dim(phospho.L6.ratio) ## ----------------------------------------------------------------------------- sum(is.na(phospho.L6.ratio)) ## ----------------------------------------------------------------------------- # Cleaning phosphosite label phospho.site.names = rownames(phospho.L6.ratio) head(phospho.site.names) ## ----------------------------------------------------------------------------- L6.sites = gsub(" ", "", sapply(strsplit(rownames(phospho.L6.ratio), "~"), function(x){paste(toupper(x[2]), x[3], "", sep=";")})) phospho.L6.ratio = t(sapply(split(data.frame(phospho.L6.ratio), L6.sites), colMeans)) head(rownames(phospho.L6.ratio)) phospho.site.names = split(phospho.site.names, L6.sites) ## ----------------------------------------------------------------------------- # take the grouping information grps = gsub("_.+", "", colnames(phospho.L6.ratio)) grps ## ----------------------------------------------------------------------------- cs = rainbow(length(unique(grps))) colorCodes = sapply(grps, switch, AICAR=cs[1], Ins=cs[2], AICARIns=cs[3]) par(mfrow=c(1,1)) plotQC(phospho.L6.ratio, panel = 2, cols=colorCodes, main = "Before batch correction") ## ----------------------------------------------------------------------------- par(mfrow=c(1,1)) plotQC(phospho.L6.ratio, cols=colorCodes, labels = colnames(phospho.L6.ratio), panel = 4, ylim=c(-20, 20), xlim=c(-30, 30), main = "Before batch correction") ## ----------------------------------------------------------------------------- design = model.matrix(~ grps - 1) design ## ----------------------------------------------------------------------------- # phosphoproteomics data normalisation and batch correction using RUV ctl = which(rownames(phospho.L6.ratio) %in% SPSs) phospho.L6.ratio.RUV = RUVphospho(phospho.L6.ratio, M = design, k = 3, ctl = ctl) ## ----------------------------------------------------------------------------- # plot after batch correction par(mfrow=c(1,2)) plotQC(phospho.L6.ratio, panel = 2, cols=colorCodes) plotQC(phospho.L6.ratio.RUV, cols=colorCodes, labels = colnames(phospho.L6.ratio), panel=2, ylim=c(-20, 20), xlim=c(-30, 30)) ## ----------------------------------------------------------------------------- par(mfrow=c(1,2)) plotQC(phospho.L6.ratio, panel = 4, cols=colorCodes, labels = colnames(phospho.L6.ratio), main="Before Batch correction") plotQC(phospho.L6.ratio.RUV, cols=colorCodes, labels = colnames(phospho.L6.ratio), panel=4, ylim=c(-20, 20), xlim=c(-30, 30), main="After Batch correction") ## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(calibrate) library(limma) library(directPA) }) ## ----------------------------------------------------------------------------- data("PhosphoSitePlus") ## ----------------------------------------------------------------------------- # divides the phospho.L6.ratio data into groups by phosphosites L6.sites <- gsub(" ", "", gsub("~[STY]", "~", sapply(strsplit(rownames(phospho.L6.ratio.RUV), "~"), function(x){paste(toupper(x[2]), x[3], sep="~")}))) phospho.L6.ratio.sites <- t(sapply(split(data.frame(phospho.L6.ratio.RUV), L6.sites), colMeans)) # fit linear model for each phosphosite f <- gsub("_exp\\d", "", colnames(phospho.L6.ratio.RUV)) X <- model.matrix(~ f - 1) fit <- lmFit(phospho.L6.ratio.RUV, X) # extract top-ranked phosphosites for each condition compared to basal table.AICAR <- topTable(eBayes(fit), number=Inf, coef = 1) table.Ins <- topTable(eBayes(fit), number=Inf, coef = 3) table.AICARIns <- topTable(eBayes(fit), number=Inf, coef = 2) DE1.RUV <- c(sum(table.AICAR[,"adj.P.Val"] < 0.05), sum(table.Ins[,"adj.P.Val"] < 0.05), sum(table.AICARIns[,"adj.P.Val"] < 0.05)) # extract top-ranked phosphosites for each group comparison contrast.matrix1 <- makeContrasts(fAICARIns-fIns, levels=X) contrast.matrix2 <- makeContrasts(fAICARIns-fAICAR, levels=X) fit1 <- contrasts.fit(fit, contrast.matrix1) fit2 <- contrasts.fit(fit, contrast.matrix2) table.AICARInsVSIns <- topTable(eBayes(fit1), number=Inf) table.AICARInsVSAICAR <- topTable(eBayes(fit2), number=Inf) DE2.RUV <- c(sum(table.AICARInsVSIns[,"adj.P.Val"] < 0.05), sum(table.AICARInsVSAICAR[,"adj.P.Val"] < 0.05)) o <- rownames(table.AICARInsVSIns) Tc <- cbind(table.Ins[o,"logFC"], table.AICAR[o,"logFC"], table.AICARIns[o,"logFC"]) rownames(Tc) = gsub("(.*)(;[A-Z])([0-9]+)(;)", "\\1;\\3;", o) colnames(Tc) <- c("Ins", "AICAR", "AICAR+Ins") # summary phosphosite-level information to proteins for performing downstream # gene-centric analyses. Tc.gene <- phosCollapse(Tc, id=gsub(";.+", "", rownames(Tc)), stat=apply(abs(Tc), 1, max), by = "max") geneSet <- names(sort(Tc.gene[,1], decreasing = TRUE))[1:round(nrow(Tc.gene) * 0.1)] # 1D gene-centric pathway analysis path1 <- pathwayOverrepresent(geneSet, annotation=Pathways.reactome, universe = rownames(Tc.gene), alter = "greater") path2 <- pathwayRankBasedEnrichment(Tc.gene[,1], annotation=Pathways.reactome, alter = "greater") ## ----------------------------------------------------------------------------- lp1 <- -log10(as.numeric(path2[names(Pathways.reactome),1])) lp2 <- -log10(as.numeric(path1[names(Pathways.reactome),1])) plot(lp1, lp2, ylab="Overrepresentation (-log10 pvalue)", xlab="Rank-based enrichment (-log10 pvalue)", main="Comparison of 1D pathway analyses", xlim = c(0, 10)) # select highly enriched pathways sel <- which(lp1 > 1.5 & lp2 > 0.9) textxy(lp1[sel], lp2[sel], gsub("_", " ", gsub("REACTOME_", "", names(Pathways.reactome)))[sel]) ## ----------------------------------------------------------------------------- # 2D direction site-centric kinase activity analyses par(mfrow=c(1,2)) dpa1 <- directPA(Tc[,c(1,3)], direction=0, annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}), main="Direction pathway analysis") dpa2 <- directPA(Tc[,c(1,3)], direction=pi*7/4, annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}), main="Direction pathway analysis") # top activated kinases dpa1$pathways[1:5,] dpa2$pathways[1:5,] ## ----------------------------------------------------------------------------- z1 <- perturbPlot2d(Tc=Tc[,c(1,2)], annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}), cex=1, xlim=c(-2, 4), ylim=c(-2, 4), main="Kinase perturbation analysis") z2 <- perturbPlot2d(Tc=Tc[,c(1,3)], annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}), cex=1, xlim=c(-2, 4), ylim=c(-2, 4), main="Kinase perturbation analysis") z3 <- perturbPlot2d(Tc=Tc[,c(2,3)], annotation=lapply(PhosphoSite.rat, function(x){gsub(";[STY]", ";", x)}), cex=1, xlim=c(-2, 4), ylim=c(-2, 4), main="Kinase perturbation analysis") ## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(parallel) library(ggplot2) library(ClueR) }) ## ----------------------------------------------------------------------------- data("phospho_liverInsTC_RUV_sample") ## ----------------------------------------------------------------------------- rownames(phospho.liver.Ins.TC.ratio.RUV) <- sapply(strsplit(rownames(phospho.liver.Ins.TC.ratio.RUV), "~"), function(x)paste(x[1], x[2], "", sep=";")) # take grouping information grps <- sapply(strsplit(colnames(phospho.liver.Ins.TC.ratio.RUV), "_"), function(x)x[3]) # select differentially phosphorylated sites sites.p <- matANOVA(phospho.liver.Ins.TC.ratio.RUV, grps) phospho.LiverInsTC <- meanAbundance(phospho.liver.Ins.TC.ratio.RUV, grps) sel <- which((sites.p < 0.05) & (rowSums(abs(phospho.LiverInsTC) > 1) != 0)) phospho.LiverInsTC.sel <- phospho.LiverInsTC[sel,] # summarise phosphosites information into gene level phospho.liverInsTC.gene <- phosCollapse(phospho.LiverInsTC.sel, gsub(";.+", "", rownames(phospho.LiverInsTC.sel)), stat = apply(abs(phospho.LiverInsTC.sel), 1, max), by = "max") # perform ClueR to identify optimal number of clusters RNGkind("L'Ecuyer-CMRG") set.seed(123) c1 <- runClue(phospho.liverInsTC.gene, annotation=Pathways.reactome, kRange = 2:10, rep = 5, effectiveSize = c(5, 100), pvalueCutoff = 0.05, alpha = 0.5) # Visualise the evaluation results data <- data.frame(Success=as.numeric(c1$evlMat), Freq=rep(2:10, each=5)) myplot <- ggplot(data, aes(x=Freq, y=Success)) + geom_boxplot(aes(x = factor(Freq), fill="gray")) + stat_smooth(method="loess", colour="red", size=3, span = 0.5) + xlab("# of cluster") + ylab("Enrichment score") + theme_classic() myplot set.seed(123) best <- clustOptimal(c1, rep=5, mfrow=c(2, 2), visualize = TRUE) # Finding enriched pathways from each cluster # ps <- sapply(best$enrichList, function(x){ # l <- ifelse(nrow(x) < 3, nrow(x), 3) # x[1:l,2] # }) # par(mfrow = c(1,1)) # barplot(-log10(as.numeric(unlist(ps)))) ## ----------------------------------------------------------------------------- RNGkind("L'Ecuyer-CMRG") set.seed(1) PhosphoSite.mouse2 = mapply(function(kinase) { gsub("(.*)(;[A-Z])([0-9]+;)", "\\1;\\3", kinase) }, PhosphoSite.mouse) # perform ClueR to identify optimal number of clusters c3 <- runClue(phospho.LiverInsTC.sel, annotation=PhosphoSite.mouse2, kRange = 2:10, rep = 5, effectiveSize = c(5, 100), pvalueCutoff = 0.05, alpha = 0.5) # Visualise the evaluation results data <- data.frame(Success=as.numeric(c3$evlMat), Freq=rep(2:10, each=5)) myplot <- ggplot(data, aes(x=Freq, y=Success)) + geom_boxplot(aes(x = factor(Freq), fill="gray")) + stat_smooth(method="loess", colour="red", size=3, span = 0.5) + xlab("# of cluster") + ylab("Enrichment score") + theme_classic() myplot set.seed(1) best <- clustOptimal(c3, rep=10, mfrow=c(2, 3), visualize = TRUE) # Finding enriched pathways from each cluster best$enrichList ## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(dplyr) library(ggplot2) library(GGally) library(ggpubr) library(calibrate) }) ## ----------------------------------------------------------------------------- data("KinaseMotifs") data("KinaseFamily") ## ----------------------------------------------------------------------------- phosphoL6 = phospho.L6.ratio.RUV ## ----------------------------------------------------------------------------- rownames(phosphoL6) = phospho.site.names # filter for up-regulated phosphosites phosphoL6.mean <- meanAbundance(phosphoL6, grps = gsub("_.+", "", colnames(phosphoL6))) aov <- matANOVA(mat=phosphoL6, grps=gsub("_.+", "", colnames(phosphoL6))) phosphoL6.reg <- phosphoL6[(aov < 0.05) & (rowSums(phosphoL6.mean > 0.5) > 0), ,drop = FALSE] L6.phos.std <- standardise(phosphoL6.reg) rownames(L6.phos.std) <- sapply(strsplit(rownames(L6.phos.std), "~"), function(x){gsub(" ", "", paste(toupper(x[2]), x[3], "", sep=";"))}) ## ----------------------------------------------------------------------------- L6.phos.seq <- sapply(strsplit(rownames(phosphoL6.reg), "~"), function(x)x[4]) ## ----------------------------------------------------------------------------- L6.matrices <- kinaseSubstrateScore(PhosphoSite.mouse, L6.phos.std, L6.phos.seq, numMotif = 5, numSub = 1) set.seed(1) L6.predMat <- kinaseSubstratePred(L6.matrices, top=30) ## ----------------------------------------------------------------------------- kinaseOI = c("PRKAA1", "AKT1") Signalomes_results <- Signalomes(KSR=L6.matrices, predMatrix=L6.predMat, exprsMat=L6.phos.std, KOI=kinaseOI) ## ----------------------------------------------------------------------------- my_color_palette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(8, "Accent")) kinase_all_color <- my_color_palette(ncol(L6.matrices$combinedScoreMatrix)) names(kinase_all_color) <- colnames(L6.matrices$combinedScoreMatrix) kinase_signalome_color <- kinase_all_color[colnames(L6.predMat)] dftoPlot_signalome <- stack(Signalomes_results$kinaseSubstrates) modules <- Signalomes_results$proteinModule names(modules) <- sapply(strsplit(as.character(names(Signalomes_results$proteinModules)), ";"), "[[", 1) dftoPlot_signalome$cluster <- modules[dftoPlot_signalome$values] dftoPlot_balloon_bycluster <- dftoPlot_signalome dftoPlot_balloon_bycluster <- na.omit(dftoPlot_balloon_bycluster) %>% dplyr::count(cluster, ind) dftoPlot_balloon_bycluster$ind <- as.factor(dftoPlot_balloon_bycluster$ind) dftoPlot_balloon_bycluster$cluster <- as.factor(dftoPlot_balloon_bycluster$cluster) dftoPlot_balloon_bycluster <- tidyr::spread(dftoPlot_balloon_bycluster, ind, n)[,-1] dftoPlot_balloon_bycluster[is.na(dftoPlot_balloon_bycluster)] <- 0 dftoPlot_balloon_bycluster <- do.call(rbind, lapply(1:nrow(dftoPlot_balloon_bycluster), function(x) { res <- sapply(dftoPlot_balloon_bycluster[x,], function(y) y/sum(dftoPlot_balloon_bycluster[x,])*100) })) dftoPlot_balloon_bycluster <- reshape2::melt(as.matrix(dftoPlot_balloon_bycluster)) colnames(dftoPlot_balloon_bycluster) <- c("cluster", "ind", "n") ggplot(dftoPlot_balloon_bycluster, aes(x = ind, y = cluster)) + geom_point(aes(col=ind, size=n)) + scale_color_manual(values=kinase_signalome_color) + scale_size_continuous(range = c(2, 17)) + theme_classic() + theme( aspect.ratio=0.25, legend.position = "bottom", axis.line = element_blank(), axis.title = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) ## ----------------------------------------------------------------------------- threskinaseNetwork = 0.9 signalomeKinase <- colnames(L6.predMat) kinase_cor <- stats::cor(L6.matrices$combinedScoreMatrix) cor_kinase_mat <- kinase_cor diag(cor_kinase_mat) <- 0 kinase_network <- lapply(1:ncol(cor_kinase_mat), function(x) names(which(cor_kinase_mat[,x] > threskinaseNetwork))) names(kinase_network) <- colnames(cor_kinase_mat) cor_kinase_mat <- apply(cor_kinase_mat, 2, function(x) x > threskinaseNetwork) cor_kinase_mat[cor_kinase_mat == FALSE] <- 0 cor_kinase_mat[cor_kinase_mat == TRUE] <- 1 library(network) links <- reshape2::melt(cor_kinase_mat) links <- links[links$value == 1,] res <- sapply(1:length(links$Var1), function(x) { kinase_cor[rownames(kinase_cor) == links$Var1[x], colnames(kinase_cor) == links$Var2[x]] }) links$cor <- res colnames(links) <- c("source", "target", "binary", "cor") network <- network::network(cor_kinase_mat, directed=FALSE) GGally::ggnet2(network, node.size=10, node.color=kinase_all_color, edge.size = 0.5, size = "degree", size.cut=3, label=colnames(cor_kinase_mat), label.size=2, mode="circle", label.color="black") ## ----------------------------------------------------------------------------- sessionInfo()