--- output: html_document: self_contained: true number_sections: no theme: flatly highlight: tango mathjax: null toc: true toc_float: true toc_depth: 2 css: style.css fontsize: 11pt bibliography: bibliography.bib vignette: > %\VignetteIndexEntry{"11 - ELMER: Use case"} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} ---
# Introduction This section contains the complete ELMER code for the following analysis: * Vignette example * BRCA Supervised analysis * BRCA Unsupervised analysis # Vignette example Below is the complete code that was explained in the other sections. ```{r, eval=FALSE} library(MultiAssayExperiment) library(ELMER.data) library(ELMER) # get distal probes that are 2kb away from TSS on chromosome 1 distal.probes <- get.feature.probe(genome = "hg19", met.platform = "450K", rm.chr = paste0("chr",c(2:22,"X","Y"))) data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp data(LUSC_meth_refined,package = "ELMER.data") # Meth mae <- createMAE(exp = GeneExp, met = Meth, save = TRUE, linearize.exp = TRUE, save.filename = "mae.rda", filter.probes = distal.probes, met.platform = "450K", genome = "hg19", TCGA = TRUE) group.col <- "definition" group1 <- "Primary solid Tumor" group2 <- "Solid Tissue Normal" dir.out <- "result" diff.dir <- "hypo" # Search for hypomethylated probes in group 1 sig.diff <- get.diff.meth(data = mae, group.col = group.col, group1 = group1, group2 = group2, minSubgroupFrac = 0.2, sig.dif = 0.3, diff.dir = diff.dir, cores = 1, dir.out = dir.out, pvalue = 0.01) nearGenes <- GetNearGenes(data = mae, probes = sig.diff$probe, numFlankingGenes = 20, # 10 upstream and 10 dowstream genes cores = 1) pair <- get.pair(data = mae, group.col = group.col, group1 = group1, mode = "unsupervised", group2 = group2, nearGenes = nearGenes, diff.dir = diff.dir, minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M permu.dir = file.path(dir.out,"permu"), permu.size = 100, # Please set to 100000 to get significant results raw.pvalue = 0.05, Pe = 0.01, # Please set to 0.001 to get significant results filter.probes = TRUE, # See preAssociationProbeFiltering function filter.percentage = 0.05, filter.portion = 0.3, dir.out = dir.out, cores = 1, label = diff.dir) # Identify enriched motif for significantly hypomethylated probes which # have putative target genes. enriched.motif <- get.enriched.motif(data = mae, probes = pair$Probe, dir.out = dir.out, label = diff.dir, min.incidence = 10, lower.OR = 1.1) TF <- get.TFs(data = mae, mode = "unsupervised", group.col = group.col, group1 = group1, group2 = group2, enriched.motif = enriched.motif, dir.out = dir.out, cores = 1, label = diff.dir) ``` # BRCA Supervised analysis ```{r, eval=FALSE} library(stringr) library(TCGAbiolinks) library(dplyr) library(ELMER) library(MultiAssayExperiment) library(parallel) library(readr) dir.create("~/paper_elmer/",showWarnings = FALSE) setwd("~/paper_elmer/") file <- "mae_BRCA_hg38_450K_no_ffpe.rda" if(file.exists(file)) { mae <- get(load(file)) } else { getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc) basedir = "DATA", # Where data will be downloaded genome = "hg38") # Genome of refenrece "hg38" or "hg19" distal.probes <- get.feature.probe(feature = NULL, genome = "hg38", met.platform = "450K") mae <- createMAE(exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda", met = "~/paper_elmer/Data/BRCA/BRCA_meth_hg38.rda", met.platform = "450K", genome = "hg38", linearize.exp = TRUE, filter.probes = distal.probes, met.na.cut = 0.2, save = FALSE, TCGA = TRUE) # Remove FFPE samples from the analysis mae <- mae[,!mae$is_ffpe] # Get molecular subytpe information from cell paper and more metadata (purity etc...) # https://doi.org/10.1016/j.cell.2015.09.033 file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx" downloader::download(file, basename(file)) subtypes <- readxl::read_excel(basename(file), skip = 2) subtypes$sample <- substr(subtypes$Methylation,1,16) meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T) meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),] meta.data <- S4Vectors::DataFrame(meta.data) rownames(meta.data) <- meta.data$sample stopifnot(all(meta.data$patient == colData(mae)$patient)) colData(mae) <- meta.data save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda") } dir.out <- "BRCA_unsupervised_hg38/hypo" cores <- 10 diff.probes <- get.diff.meth(data = mae, group.col = "definition", group1 = "Primary solid Tumor", group2 = "Solid Tissue Normal", diff.dir = "hypo", # Get probes hypometh. in group 1 cores = cores, minSubgroupFrac = 0.2, # % group samples used. pvalue = 0.01, sig.dif = 0.3, dir.out = dir.out, save = TRUE) # For each differently methylated probes we will get the # 20 nearby genes (10 downstream and 10 upstream) nearGenes <- GetNearGenes(data = mae, probes = diff.probes$probe, numFlankingGenes = 20, cores = cores) # This step is the most time consuming. Depending on the size of the groups # and the number of probes found previously it migh take hours Hypo.pair <- get.pair(data = mae, nearGenes = nearGenes, group.col = "definition", group1 = "Primary solid Tumor", group2 = "Solid Tissue Normal", permu.dir = paste0(dir.out,"/permu"), permu.size = 10000, mode = "unsupervised", minSubgroupFrac = 0.4, # 40% of samples to create U and M raw.pvalue = 0.001, Pe = 0.001, filter.probes = TRUE, filter.percentage = 0.05, filter.portion = 0.3, dir.out = dir.out, cores = cores, label = "hypo") # Number of pairs: 2950 enriched.motif <- get.enriched.motif(data = mae, min.motif.quality = "DS", probes = unique(Hypo.pair$Probe), dir.out = dir.out, label = "hypo", min.incidence = 10, lower.OR = 1.1) TF <- get.TFs(data = mae, group.col = "definition", group1 = "Primary solid Tumor", group2 = "Solid Tissue Normal", minSubgroupFrac = 0.4, # Set to 1 if supervised mode enriched.motif = enriched.motif, dir.out = dir.out, cores = cores, label = "hypo") ``` # BRCA Unsupervised analysis ```{r, eval=FALSE} library(stringr) library(TCGAbiolinks) library(dplyr) library(ELMER) library(MultiAssayExperiment) library(parallel) library(readr) #----------------------------------- # 1 - Samples # ---------------------------------- dir.create("~/paper_elmer/",showWarnings = FALSE) setwd("~/paper_elmer/") file <- "mae_BRCA_hg38_450K_no_ffpe.rda" if(file.exists(file)) { mae <- get(load(file)) } else { getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc) basedir = "DATA", # Where data will be downloaded genome = "hg38") # Genome of refenrece "hg38" or "hg19" distal.probes <- get.feature.probe(feature = NULL, genome = "hg38", met.platform = "450K") mae <- createMAE(exp = "DATA/BRCA/BRCA_RNA_hg38.rda", met = "DATA/BRCA/BRCA_meth_hg38.rda", met.platform = "450K", genome = "hg38", linearize.exp = TRUE, filter.probes = distal.probes, met.na.cut = 0.2, save = FALSE, TCGA = TRUE) # Remove FFPE samples from the analysis mae <- mae[,!mae$is_ffpe] # Get molecular subytpe information from cell paper and more metadata (purity etc...) # https://doi.org/10.1016/j.cell.2015.09.033 file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx" downloader::download(file, basename(file)) subtypes <- readxl::read_excel(basename(file), skip = 2) subtypes$sample <- substr(subtypes$Methylation,1,16) meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T) meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),] meta.data <- S4Vectors::DataFrame(meta.data) rownames(meta.data) <- meta.data$sample stopifnot(all(meta.data$patient == colData(mae)$patient)) colData(mae) <- meta.data save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda") } cores <- 6 direction <- c( "hypo","hyper") genome <- "hg38" group.col <- "PAM50" groups <- t(combn(na.omit(unique(colData(mae)[,group.col])),2)) for(g in 1:nrow(groups)) { group1 <- groups[g,1] group2 <- groups[g,2] for (j in direction){ tryCatch({ message("Analysing probes ",j, "methylated in ", group1, " vs ", group2) dir.out <- paste0("BRCA_supervised_",genome,"/",group1,"_",group2,"/",j) dir.create(dir.out, recursive = TRUE) #-------------------------------------- # STEP 3: Analysis | #-------------------------------------- # Step 3.1: Get diff methylated probes | #-------------------------------------- Sig.probes <- get.diff.meth(data = mae, group.col = group.col, group1 = group1, group2 = group2, sig.dif = 0.3, minSubgroupFrac = 1, cores = cores, dir.out = dir.out, diff.dir = j, pvalue = 0.01) if(nrow(Sig.probes) == 0) next #------------------------------------------------------------- # Step 3.2: Identify significant probe-gene pairs | #------------------------------------------------------------- # Collect nearby 20 genes for Sig.probes nearGenes <- GetNearGenes(data = mae, probe = Sig.probes$probe, cores = cores) pair <- get.pair(data = mae, nearGenes = nearGenes, group.col = group.col, group1 = group1, group2 = group2, permu.dir = paste0(dir.out,"/permu"), dir.out = dir.out, mode = "supervised", diff.dir = j, cores = cores, label = j, permu.size = 10000, raw.pvalue = 0.001) Sig.probes.paired <- read.csv(paste0(dir.out, "/getPair.",j, ".pairs.significant.csv"), stringsAsFactors=FALSE)[,1] #------------------------------------------------------------- # Step 3.3: Motif enrichment analysis on the selected probes | #------------------------------------------------------------- if(length(Sig.probes.paired) > 0 ){ #------------------------------------------------------------- # Step 3.3: Motif enrichment analysis on the selected probes | #------------------------------------------------------------- enriched.motif <- get.enriched.motif(probes = Sig.probes.paired, dir.out = dir.out, data = mae, label = j, plot.title = paste0("BRCA: OR for paired probes ", j, "methylated in ", group1, " vs ",group2)) motif.enrichment <- read.csv(paste0(dir.out, "/getMotif.",j, ".motif.enrichment.csv"), stringsAsFactors=FALSE) if(length(enriched.motif) > 0){ #------------------------------------------------------------- # Step 3.4: Identifying regulatory TFs | #------------------------------------------------------------- print("get.TFs") TF <- get.TFs(data = mae, enriched.motif = enriched.motif, dir.out = dir.out, mode = "supervised", group.col = group.col, group1 = group1, diff.dir = j, group2 = group2, cores = cores, label = j) TF.meth.cor <- get(load(paste0(dir.out, "/getTF.",j, ".TFs.with.motif.pvalue.rda"))) save(mae,TF, enriched.motif, Sig.probes.paired, pair, nearGenes, Sig.probes, motif.enrichment, TF.meth.cor, file=paste0(dir.out,"/ELMER_results_",j,".rda")) } } }, error = function(e){ message(e) }) } } ```