## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", echo = TRUE ) ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # # BiocManager::install("regionalpcs") ## ----eval=FALSE--------------------------------------------------------------- # # install devtools package if not already installed # if (!requireNamespace("devtools", quietly=TRUE)) # install.packages("devtools") # # # download and install the regionalpcs package # devtools::install_github("tyeulalio/regionalpcs") ## ----load-packages, message=FALSE, warning=FALSE------------------------------ library(regionalpcs) library(GenomicRanges) library(tidyr) library(tibble) library(dplyr) library(minfiData) library(TxDb.Hsapiens.UCSC.hg19.knownGene) ## ----------------------------------------------------------------------------- # Load the MethylSet data data(MsetEx.sub) # Display the first few rows of the dataset for a preliminary view head(MsetEx.sub) # Extract methylation M-values from the MethylSet # M-values are logit-transformed beta-values and are often used in differential # methylation analysis for improved statistical performance. mvals <- getM(MsetEx.sub) # Display the extracted methylation beta values head(mvals) ## ----genomic-positions-------------------------------------------------------- # Map the methylation data to genomic coordinates using the mapToGenome # function. This creates a GenomicMethylSet (gset) which includes genomic # position information for each probe. gset <- mapToGenome(MsetEx.sub) # Display the GenomicMethylSet object to observe the structure and initial # entries. gset # Convert the genomic position data into a GRanges object, enabling genomic # range operations in subsequent analyses. # The GRanges object (cpg_gr) provides a versatile structure for handling # genomic coordinates in R/Bioconductor. cpg_gr <- granges(gset) # Display the GRanges object for a preliminary view of the genomic coordinates. cpg_gr ## ----------------------------------------------------------------------------- # Obtain promoter regions # The TxDb object 'txdb' facilitates the retrieval of transcript-based # genomic annotations. txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene # Extracting promoter regions with a defined upstream and downstream window. # This GRanges object 'promoters_gr' will be utilized to map and summarize # methylation data in promoter regions. promoters_gr <- suppressMessages(promoters(genes(txdb), upstream=1000, downstream=0)) # Display the GRanges object containing the genomic coordinates of promoter # regions. promoters_gr ## ----------------------------------------------------------------------------- # get the region map using the regionalpcs function region_map <- regionalpcs::create_region_map(cpg_gr=cpg_gr, genes_gr=promoters_gr) # Display the initial few rows of the region map. head(region_map) ## ----compute-regional-pcs----------------------------------------------------- # Display head of region map head(region_map) dim(region_map) # Compute regional PCs res <- compute_regional_pcs(meth=mvals, region_map=region_map, pc_method="gd") ## ----inspect-output----------------------------------------------------------- # Inspect the output list elements names(res) ## ----extract-regional-pcs----------------------------------------------------- # Extract regional PCs regional_pcs <- res$regional_pcs head(regional_pcs)[1:4] ## ----count-pcs-regions-------------------------------------------------------- # Count the number of unique gene regions and PCs regions <- data.frame(gene_pc = rownames(regional_pcs)) |> separate(gene_pc, into = c("gene", "pc"), sep = "-") head(regions) # number of genes that have been summarized length(unique(regions$gene)) # how many of each PC did we get table(regions$pc) ## ----marcenko-pasteur-method-------------------------------------------------- # Using Marcenko-Pasteur method mp_res <- compute_regional_pcs(mvals, region_map, pc_method = "mp") # select the regional pcs mp_regional_pcs <- mp_res$regional_pcs # separate the genes from the pc numbers mp_regions <- data.frame(gene_pc = rownames(mp_regional_pcs)) |> separate(gene_pc, into = c("gene", "pc"), sep = "-") head(mp_regions) # number of genes that have been summarized length(unique(mp_regions$gene)) # how many of each PC did we get table(mp_regions$pc) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()