## ----setup, echo = FALSE------------------------------------------------------ knitr::opts_chunk$set(message = FALSE, crop = NULL) ## ----load-packages------------------------------------------------------------ library(motifTestR) library(extraChIPs) library(rtracklayer) library(BSgenome.Hsapiens.UCSC.hg19) library(parallel) library(ggplot2) library(patchwork) theme_set(theme_bw()) cores <- 1 ## ----echo = FALSE------------------------------------------------------------- data("ar_er_peaks") ## ----load-example-peaks------------------------------------------------------- data("ar_er_peaks") ar_er_peaks sq <- seqinfo(ar_er_peaks) ## ----test-seq----------------------------------------------------------------- test_seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19, ar_er_peaks) names(test_seq) <- as.character(ar_er_peaks) ## ----ex-pwm------------------------------------------------------------------- data("ex_pwm") names(ex_pwm) ex_pwm$ESR1 ## ----match-esr1--------------------------------------------------------------- getPwmMatches(ex_pwm$ESR1, test_seq) ## ----best-match-esr1---------------------------------------------------------- getPwmMatches(ex_pwm$ESR1, test_seq, best_only = TRUE) ## ----bm-all------------------------------------------------------------------- bm_all <- getPwmMatches( ex_pwm, test_seq, best_only = TRUE, break_ties = "all", mc.cores = cores ) ## ----count-pwm-matches-------------------------------------------------------- countPwmMatches(ex_pwm, test_seq, mc.cores = cores) ## ----test-motif-pos----------------------------------------------------------- res_pos <- testMotifPos(bm_all, mc.cores = cores) head(res_pos) ## ----test-motif-pos-abs------------------------------------------------------- res_abs <- testMotifPos(bm_all, abs = TRUE, mc.cores = cores) head(res_abs) ## ----plot-pos, fig.cap = "Distribution of motif matches around the centres of the set of peaks"---- topMotifs <- rownames(res_pos)[1:4] A <- plotMatchPos(bm_all[topMotifs], binwidth = 10, se = FALSE) B <- plotMatchPos(bm_all[topMotifs], binwidth = 10, geom = "col") + geom_smooth(se = FALSE, show.legend = FALSE) + facet_wrap(~name) A + B + plot_annotation(tag_levels = "A") & theme(legend.position = "bottom") ## ----plot-abs-pos, fig.cap = "Distribution of motif matches shown as a distance from the centre of each sequence"---- topMotifs <- rownames(res_abs)[1:4] A <- plotMatchPos(bm_all[topMotifs], abs = TRUE, type = "heatmap") + scale_fill_viridis_c() B <- plotMatchPos( bm_all[topMotifs], abs = TRUE, type = "cdf", geom = "line", binwidth = 5 ) A + B + plot_annotation(tag_levels = "A") & theme(legend.position = "bottom") ## ----zr75-enh----------------------------------------------------------------- data("zr75_enh") mean(overlapsAny(ar_er_peaks, zr75_enh)) ## ----define-bg-ranges--------------------------------------------------------- ar_er_peaks$feature <- ifelse( overlapsAny(ar_er_peaks, zr75_enh), "enhancer", "other" ) chr1 <- GRanges(sq)[1] bg_ranges <- GRangesList( enhancer = zr75_enh, other = GenomicRanges::setdiff(chr1, zr75_enh) ) ## ----rm-ranges---------------------------------------------------------------- data("hg19_mask") set.seed(305) rm_ranges <- makeRMRanges( splitAsList(ar_er_peaks, ar_er_peaks$feature), bg_ranges, exclude = hg19_mask, n_iter = 100 ) ## ----rm-seq------------------------------------------------------------------- rm_seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19, rm_ranges) mcols(rm_seq) <- mcols(rm_ranges) ## ----enrich-res--------------------------------------------------------------- enrich_res <- testMotifEnrich( ex_pwm, test_seq, rm_seq, model = "quasi", mc.cores = cores ) head(enrich_res) ## ----iter-res----------------------------------------------------------------- iter_res <- testMotifEnrich( ex_pwm, test_seq, rm_seq, mc.cores = cores, model = "iteration" ) head(iter_res) ## ----plot-overlaps, fig.cap = "Distribution of select PWM matches within sequences. Each sequence is only considered once and as such, match numbers may be below those returned during testing, which includes multiple matches within a sequence."---- topMotifs <- rownames(enrich_res)[1:4] ex_pwm[topMotifs] |> getPwmMatches(test_seq, mc.cores = cores) |> lapply(\(x) x$seq) |> plotOverlaps(type = "upset", min_size = 5) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()