## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = TRUE, fig.width = 12, fig.height = 6.75 ) ## ----setup, message = F------------------------------------------------------- library(methodical) library(DESeq2) library(TumourMethData) ## ----eval=FALSE--------------------------------------------------------------- # # Installing Methodical # if (!require("BiocManager")) # install.packages("BiocManager") # BiocManager::install("methodical") ## ----------------------------------------------------------------------------- # Download mcrpc_wgbs_hg38 from TumourMethDatasets. mcrpc_wgbs_hg38_chr11 <- TumourMethData::download_meth_dataset(dataset = "mcrpc_wgbs_hg38_chr11") # Download transcript counts mcrpc_transcript_counts <- TumourMethData::download_rnaseq_dataset(dataset = "mcrpc_wgbs_hg38_chr11") ## ----------------------------------------------------------------------------- # Download Gencode annotation library(AnnotationHub) transcript_annotation <- AnnotationHub()[["AH75119"]] # Import the Gencode annotation and subset for transcript annotation for non-mitochondrial protein-coding genes transcript_annotation <- transcript_annotation[transcript_annotation$type == "transcript"] transcript_annotation <- transcript_annotation[transcript_annotation$transcript_type == "protein_coding"] # Remove transcript version from ID transcript_annotation$ID <- gsub("\\.[0-9]*", "", transcript_annotation$ID) # Filter for transcripts on chromosome 11 transcript_annotation_chr11 <- transcript_annotation[seqnames(transcript_annotation) == "chr11"] # Filter for transcripts in the counts table transcript_annotation_chr11 <- transcript_annotation_chr11[transcript_annotation_chr11$ID %in% row.names(mcrpc_transcript_counts)] # Set transcript ID as names of ranges names(transcript_annotation_chr11) <- transcript_annotation_chr11$ID # Get the TSS for each transcript set ID as names transcript_tss_chr11 <- resize(transcript_annotation_chr11, 1, fix = "start") rm(transcript_annotation) ## ----------------------------------------------------------------------------- # Subset mcrpc_transcript_counts for protein-coding transcripts mcrpc_transcript_counts <- mcrpc_transcript_counts[transcript_tss_chr11$ID, ] # Create a DESeqDataSet from Kallisto counts. mcrpc_transcript_counts_dds <- DESeqDataSetFromMatrix(countData = mcrpc_transcript_counts, colData = data.frame(sample = names(mcrpc_transcript_counts)), design = ~ 1) mcrpc_transcript_counts_dds <- estimateSizeFactors(mcrpc_transcript_counts_dds) mcrpc_transcript_counts_normalized <- data.frame(counts(mcrpc_transcript_counts_dds, normalized = TRUE)) ## ----------------------------------------------------------------------------- # Find samples with both WGBS and RNA-seq count data common_mcprc_samples <- intersect(names(mcrpc_transcript_counts), colnames(mcrpc_wgbs_hg38_chr11)) # Create a BPPARAM class BPPARAM = BiocParallel::bpparam() # Calculate CpG methylation-transcription correlations for all TSS on chromosome 11 system.time({transcript_cpg_meth_cors_mcrpc_samples_5kb <- calculateMethSiteTranscriptCors( meth_rse = mcrpc_wgbs_hg38_chr11, transcript_expression_table = mcrpc_transcript_counts_normalized, samples_subset = common_mcprc_samples, tss_gr = transcript_tss_chr11, cor_method = "pearson", max_sites_per_chunk = NULL, expand_upstream = 5000, expand_downstream = 5000, BPPARAM = BPPARAM)}) ## ----------------------------------------------------------------------------- # Extract correlation results for ENST00000398606 transcript gstp1_cpg_meth_transcript_cors <- transcript_cpg_meth_cors_mcrpc_samples_5kb[["ENST00000398606"]] # Examine first few rows of the results head(gstp1_cpg_meth_transcript_cors) # Extract TSS for the transcript attributes(gstp1_cpg_meth_transcript_cors)$tss_range ## ----fig.show='hide'---------------------------------------------------------- # Plot methylation-transcription correlation values for GSTP1 showing # chromosome 11 position on the x-axis meth_cor_plot_absolute_distance <- plotMethSiteCorCoefs( meth_site_cor_values = gstp1_cpg_meth_transcript_cors, reference_tss = FALSE, ylabel = "Correlation Value", title = "Correlation Between GSTP1 Transcription and CpG Methylation") print(meth_cor_plot_absolute_distance) # Plot methylation-transcription correlation values for GSTP1 showing # distance to the GSTP1 on the x-axis meth_cor_plot_relative_distance <- plotMethSiteCorCoefs( meth_site_cor_values = gstp1_cpg_meth_transcript_cors, reference_tss = TRUE, ylabel = "Correlation Value", title = "Correlation Between GSTP1 Transcription and CpG Methylation") print(meth_cor_plot_relative_distance) ## ----fig.show='hide'---------------------------------------------------------- # Add a dashed line to meth_cor_plot_relative_distance where the correlation is 0 meth_cor_plot_relative_distance + ggplot2::geom_hline(yintercept = 0, linetype = "dashed") ## ----fig.show='hide'---------------------------------------------------------- # Calculate gene body methylation-transcription correlations for all TSS on chromosome 11 system.time({transcript_body_meth_cors_mcrpc_samples <- calculateRegionMethylationTranscriptCors( meth_rse = mcrpc_wgbs_hg38_chr11, transcript_expression_table = mcrpc_transcript_counts_normalized, samples_subset = common_mcprc_samples, genomic_regions = transcript_annotation_chr11, genomic_region_transcripts = transcript_annotation_chr11$ID, region_methylation_summary_function = colMeans, cor_method = "pearson", BPPARAM = BPPARAM)}) # Show the top of the results table head(transcript_body_meth_cors_mcrpc_samples) # We'll filter for significant correlation values and plot their distribution transcript_body_meth_cors_significant <- dplyr::filter(transcript_body_meth_cors_mcrpc_samples, q_val < 0.05) ggplot(transcript_body_meth_cors_significant, aes(x = cor)) + geom_histogram() + theme_bw() + scale_y_continuous(expand = expansion(mult = c(0, 0.2), add = 0)) + labs(x = "Correlation Values", y = "Number of Significant Correlation Values") ## ----------------------------------------------------------------------------- sessionInfo()