## ---- eval=TRUE, message=FALSE, warning=FALSE, echo=FALSE------------------ library(COCOA) library(ggplot2) data("brcaPCScores657") ggplot(data = brcaPCScores657, mapping = aes(x=PC1, y=PC4, col=ER_Status)) + geom_point() + ggtitle("PCA of DNA methylation data from breast cancer patients") + theme_classic() ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ library(COCOA) library(data.table) library(ggplot2) data("esr1_chr1") data("gata3_chr1") data("nrf1_chr1") data("atf3_chr1") data("brcaLoadings1") data("brcaMCoord1") data("brcaPCScores") data("brcaMethylData1") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ # prepare data GRList <- GRangesList(esr1_chr1, gata3_chr1, nrf1_chr1, atf3_chr1) regionSetName <- c("esr1_chr1", "gata3_chr1", "nrf1_chr1", "atf3_chr1") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ PCsToAnnotate <- paste0("PC", 1:4) regionSetScores <- runCOCOA(loadingMat=brcaLoadings1, signalCoord=brcaMCoord1, GRList=GRList, PCsToAnnotate=PCsToAnnotate, scoringMetric="regionMean") regionSetScores$regionSetName <- regionSetName ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ rsScoreHeatmap(regionSetScores, PCsToAnnotate=paste0("PC", 1:4), rsNameCol = "regionSetName", orderByPC = "PC1", column_title = "Region sets ordered by score for PC1") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ rsScoreHeatmap(regionSetScores, PCsToAnnotate=paste0("PC", 1:4), rsNameCol = "regionSetName", orderByPC = "PC2", column_title = "Region sets ordered by score for PC2") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ wideGRList <- lapply(GRList, resize, width=14000, fix="center") loadProfile <- lapply(wideGRList, function(x) getLoadingProfile(loadingMat=brcaLoadings1, signalCoord=brcaMCoord1, regionSet=x, PCsToAnnotate=PCsToAnnotate, binNum=21)) ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ # average loading value from each PC to normalize so PCs can be compared with each other avLoad <- apply(X=brcaLoadings1[, PCsToAnnotate], MARGIN=2, FUN=function(x) mean(abs(x))) # normalize loadProfile <- lapply(loadProfile, FUN=function(x) as.data.frame(mapply(FUN = function(y, z) x[, y] - z, y=PCsToAnnotate, z=avLoad))) binID = 1:nrow(loadProfile[[1]]) loadProfile <- lapply(loadProfile, FUN=function(x) cbind(binID, x)) # for the plot scale maxVal <- max(sapply(loadProfile, FUN=function(x) max(x[, PCsToAnnotate]))) minVal <- min(sapply(loadProfile, FUN=function(x) min(x[, PCsToAnnotate]))) # convert to long format for plots loadProfile <- lapply(X=loadProfile, FUN=function(x) tidyr::gather(data=x, key="PC", value="loading_value", PCsToAnnotate)) loadProfile <- lapply(loadProfile, function(x){x$PC <- factor(x$PC, levels=PCsToAnnotate); return(x)}) ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ wrapper <- function(x, ...) paste(strwrap(x, ...), collapse="\n") profilePList <- list() for (i in seq_along(loadProfile)) { thisRS <- loadProfile[[i]] profilePList[[i]] <- ggplot(data=thisRS, mapping=aes(x=binID , y=loading_value)) + geom_line() + ylim(c(minVal, maxVal)) + facet_wrap(facets="PC") + ggtitle(label=wrapper(regionSetName[i], width=30)) + xlab("Genome around region set, 14 kb") + ylab("Normalized loading value") + theme(panel.grid.major.x=element_blank(), panel.grid.minor.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) profilePList[[i]] } profilePList[[1]] profilePList[[2]] profilePList[[3]] profilePList[[4]] ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ signalAlongPC(genomicSignal=brcaMethylData1, signalCoord=brcaMCoord1, regionSet=esr1_chr1, pcScores=brcaPCScores, orderByPC="PC1", cluster_columns=TRUE, column_title = "Individual cytosine/CpG", name = "DNA methylation level") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ signalAlongPC(genomicSignal=brcaMethylData1, signalCoord=brcaMCoord1, regionSet=nrf1_chr1, pcScores=brcaPCScores, orderByPC="PC1", cluster_columns=TRUE, column_title = "Individual cytosine/CpG", name = "DNA methylation level") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ regionQuantileByPC(loadingMat = brcaLoadings1, signalCoord = brcaMCoord1, regionSet = esr1_chr1, rsName = "Estrogen receptor (chr1)", PCsToAnnotate=paste0("PC", 1:4), maxRegionsToPlot = 8000, cluster_rows = TRUE, cluster_columns = FALSE, column_title = rsName, name = "Percentile of loading scores in PC") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ regionQuantileByPC(loadingMat = brcaLoadings1, signalCoord = brcaMCoord1, regionSet = nrf1_chr1, rsName = "Nrf1 (chr1)", PCsToAnnotate=paste0("PC", 1:4), maxRegionsToPlot = 8000, cluster_rows = TRUE, cluster_columns = FALSE, column_title = rsName, name = "Percentile of loading scores in PC") ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ toyData = data.frame(F1 = 1:5, F2 = c(2, 0, 6, 4, 3), F3 = c(1, 3, 2, 10, 5)) ## ---- eval=TRUE, message=FALSE, warning=FALSE------------------------------ prcomp(toyData)$rotation ## ---- eval=FALSE, message=FALSE, warning=FALSE----------------------------- # PC1 = 0.319 * F1 + 0.201 * F2 + 0.926 * F3 # PC2 = -0.178 * F1 + -0.947 * F2 + 0.267 * F3 # PC3 = -0.931 * F1 + 0.250 * F2 + 0.267 * F3