## ----setup-------------------------------------------------------------------- knitr::opts_chunk$set(message = FALSE, crop = NULL) ## ----load-packages, eval = TRUE----------------------------------------------- library(tidyverse) library(Rsamtools) library(csaw) library(BiocParallel) library(rtracklayer) library(edgeR) library(patchwork) library(extraChIPs) library(plyranges) library(scales) library(glue) library(here) library(magrittr) library(quantro) library(cqn) library(ggrepel) theme_set(theme_bw()) ## ----samples, eval = FALSE---------------------------------------------------- # samples <- here("data", "PRJNA509779.tsv") %>% # read_tsv() %>% # dplyr::filter(target == "H3K27ac") %>% # mutate(treatment = factor(treatment, levels = c("E2", "E2DHT"))) # accessions <- unique(c(samples$accession, samples$input)) # treat_colours <- c("steelblue", "red3", "grey") # names(treat_colours) <- c(levels(samples$treatment), "Input") ## ----bfl, eval = FALSE-------------------------------------------------------- # bfl <- here("data", "H3K27ac", glue("{accessions}.bam")) %>% # BamFileList() %>% # setNames(str_remove_all(names(.), ".bam")) # file.exists(path(bfl)) ## ----sq, eval = FALSE--------------------------------------------------------- # sq <- seqinfo(bfl) %>% # sortSeqlevels() %>% # keepStandardChromosomes() # isCircular(sq) <- rep(FALSE, length(seqlevels(sq))) # genome(sq) <- "GRCh37" ## ----wincounts, eval = FALSE-------------------------------------------------- # greylist <- here("data", "chr10_greylist.bed") %>% # import.bed(seqinfo = sq) # blacklist <- here("data", "chr10_blacklist.bed") %>% # import.bed(seqinfo = sq) # rp <- readParam( # pe = "none", # dedup = TRUE, # restrict = "chr10", # discard = c(greylist, blacklist) # ) # wincounts <- windowCounts( # bam.files = bfl, # spacing = 40, # width = 120, # ext = 200, # filter = length(bfl), # param = rp # ) # seqlevels(wincounts) <- seqlevels(sq) # seqinfo(wincounts) <- sq ## ----rowranges-wincounts, eval = FALSE---------------------------------------- # rowRanges(wincounts) ## ----coldata-wincounts, eval = FALSE------------------------------------------ # colData(wincounts) ## ----update-coldata, eval = FALSE--------------------------------------------- # colData(wincounts) <- colData(wincounts) %>% # as_tibble(rownames = "accession") %>% # left_join(samples, by = "accession") %>% # dplyr::select( # accession, all_of(colnames(colData(wincounts))), target, treatment # ) %>% # mutate( # target = str_replace_na(target, "Input"), # treatment = str_replace_na(treatment, "Input") %>% # as.factor() # ) %>% # DataFrame() %>% # set_rownames(.$accession) ## ----plot-densities, eval = FALSE--------------------------------------------- # plotAssayDensities(wincounts, colour = "treatment", trans = "log1p") + # theme_bw() + # scale_colour_manual(values = treat_colours) ## ----filter-counts, eval = FALSE---------------------------------------------- # guide_regions <- here("data", "H3K27ac", "H3K27ac_chr10.bed") %>% # import.bed(seqinfo = sq) # filtcounts <- dualFilter( # x = wincounts[, wincounts$target == "H3K27ac"], # bg = wincounts[, wincounts$target == "Input"], # ref = guide_regions, # q = 0.6 # ) ## ----plot-density-pca, eval = FALSE------------------------------------------- # A <- plotAssayDensities(filtcounts, assay = "logCPM", colour = "treatment") + # scale_colour_manual(values = treat_colours) + # theme_bw() # B <- plotAssayPCA(filtcounts, "logCPM", colour = "treatment", label = "accession") + # scale_colour_manual(values = treat_colours) + # theme_bw() # A + B + # plot_layout(guides = "collect") + plot_annotation(tag_levels = "A") ## ----add-dens-pca, echo = FALSE, eval = TRUE, fig.cap = "*A) Densities for logCPM values across all samples after discarding windows less likely to contain H3K27ac signal using `dualFilter()` B) PCA plot based on the logCPM assay*"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-density-pca-1.png") ## ----plot-assay-rle, eval = FALSE--------------------------------------------- # a <- plotAssayRle(filtcounts, assay = "logCPM", fill = "treat") + # geom_hline(yintercept = 0, linetype = 2, colour = "grey") + # scale_fill_manual(values = treat_colours) + # ggtitle("RLE: Across All Samples") + # theme_bw() # b <- plotAssayRle( # filtcounts, assay = "logCPM", fill = "treat", rle_group = "treat" # ) + # geom_hline(yintercept = 0, linetype = 2, colour = "grey") + # scale_fill_manual(values = treat_colours) + # ggtitle("RLE: Within Treatment Groups") + # theme_bw() # a + b + plot_layout(guides = "collect") + # plot_annotation(tag_levels = "A") ## ----add-rle-plot, eval = TRUE, echo = FALSE, fig.cap = "*RLE plots (A) across all samples and (B) with values calculated within treatment groups*"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-assay-rle-1.png") ## ----fit-gr, eval = FALSE----------------------------------------------------- # X <- model.matrix(~treatment, data = colData(filtcounts)) # fit_gr <- fitAssayDiff(filtcounts, design = X, fc = 1.2, asRanges = TRUE) ## ----results-gr, eval = FALSE------------------------------------------------- # results_gr <- mergeByHMP(fit_gr, inc_cols = "overlaps_ref", merge_within = 120) %>% # mutate( # status <- case_when( # hmp_fdr > 0.05 ~ "Unchanged", # logFC > 0 ~ "Increased", # logFC < 0 ~ "Decreased" # ) # ) # arrange(results_gr, hmp)[1:5] ## ----plot-ma-libsize, eval = FALSE-------------------------------------------- # A <- fit_gr %>% # as_tibble() %>% # ggplot(aes(logCPM, logFC)) + # geom_point(alpha = 0.6) + # geom_smooth(se = FALSE, method = "loess") + # geom_label( # aes(label = label), # data = . %>% # summarise( # n = dplyr::n(), # logCPM = max(logCPM) - 0.15 * diff(range(logCPM)), # logFC = max(logFC) - 0.05 * diff(range(logFC)), # label = glue("{comma(n)}\nSliding Windows") # ) # ) + # ylim(range(fit_gr$logFC)) + # ggtitle("MA Plot: All Retained Sliding Windows") # B <- results_gr %>% # as_tibble() %>% # mutate(`FDR < 0.05` = hmp_fdr < 0.05) %>% # ggplot(aes(logCPM, logFC)) + # geom_point(aes(colour = `FDR < 0.05`), alpha = 0.6) + # geom_smooth(se = FALSE, method = "loess") + # geom_label_repel( # aes(label = range, colour = `FDR < 0.05`), # data = . %>% dplyr::filter(hmp_fdr == min(hmp_fdr)), # show.legend = FALSE # ) + # geom_label( # aes(label = label), # data = . %>% # summarise( # n = dplyr::n(), # logCPM = max(logCPM) - 0.15 * diff(range(logCPM)), # logFC = max(fit_gr$logFC) - 0.05 * diff(range(fit_gr$logFC)), # label = glue("{comma(n)}\nMerged Windows") # ) # ) + # scale_colour_manual(values = c("black", "red")) + # ylim(range(fit_gr$logFC)) + # ggtitle("MA Plot: Merged Windows") # A + B + plot_annotation(tag_levels = "A") ## ----add-ma-plot, echo=FALSE, eval=TRUE, fig.cap="*MA Plots using A) all sliding windows (after filtering before merging) and B) the final set of merged windows. The most highly ranked merged window is also labelled in the set of merged windows. Blue lines represent a loess curve through the data.*"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-ma-libsize-1.png") ## ----qtest, eval = FALSE------------------------------------------------------ # set.seed(100) # qtest <- assay(filtcounts, "counts") %>% # quantro(groupFactor = filtcounts$treatment, B = 1e3) # qtest ## ----fit-tmm, eval = FALSE---------------------------------------------------- # tmm_gr <- fitAssayDiff( # filtcounts, design = X, fc = 1.2, norm = "TMM", asRanges = TRUE # ) # tmm_results <- mergeByHMP(tmm_gr, inc_cols = "overlaps_ref", merge_within = 120) %>% # mutate( # status = case_when( # hmp_fdr > 0.05 ~ "Unchanged", # logFC < 0 ~ "Decreased", # logFC > 0 ~ "Increased" # ) # ) # table(tmm_results$status) ## ----plot-ma-tmm, eval = FALSE------------------------------------------------ # tmm_gr %>% # as_tibble() %>% # ggplot(aes(logCPM, logFC)) + # geom_point(alpha = 0.6) + # geom_smooth(se = FALSE, method = "loess") + # ggtitle("MA Plot: TMM Normalisation") ## ----gencode, eval = FALSE---------------------------------------------------- # gencode <- here("data/gencode.v43lift37.chr10.annotation.gtf.gz") %>% # import.gff() %>% # filter_by_overlaps(GRanges("chr10:42354900-100000000")) %>% # split(.$type) # seqlevels(gencode) <- seqlevels(sq) # seqinfo(gencode) <- sq ## ----tss, eval = FALSE-------------------------------------------------------- # tss <- gencode$transcript %>% # resize(width = 1, fix = "start") %>% # select(gene_id, ends_with("name")) %>% # reduceMC(min.gapwidth = 0) ## ----subset-tss, eval = FALSE------------------------------------------------- # subset(tss, vapply(gene_name, function(x) "PPIF" %in% x, logical(1))) ## ----overlaps-any, eval = FALSE----------------------------------------------- # tmm_results$tss <- overlapsAny(tmm_results, tss) ## ----promoters, eval = FALSE-------------------------------------------------- # promoters <- gencode$transcript %>% # select(gene_id, ends_with("name")) %>% # promoters(upstream = 2500, downstream = 500) %>% # reduceMC(simplify = FALSE) # tail(promoters) ## ----map-by-feature, eval = FALSE--------------------------------------------- # tmm_results <- mapByFeature( # tmm_results, genes = gencode$gene, # prom = select(promoters, gene_id, gene_name) # ) # tmm_results %>% filter(hmp_fdr < 0.05) %>% arrange(hmp) ## ----plot-ma-mapped, eval = FALSE--------------------------------------------- # status_colours <- c(Unchanged = "grey70", Increased = "red3", Decreased = "royalblue") # tmm_results %>% # as_tibble() %>% # ggplot(aes(logCPM, logFC, colour = status, shape = tss)) + # geom_point() + # geom_label_repel( # aes(label = label), # data = . %>% # arrange(hmp) %>% # dplyr::slice(1:20) %>% # mutate( # label = vapply(gene_name, paste, character(1), collapse = "; ") %>% # str_trunc(30) # ), # fill = "white", alpha = 0.6, show.legend = FALSE # ) + # scale_colour_manual(values = status_colours) + # scale_shape_manual(values = c(19, 21)) ## ----add-ma-mapped-plot, eval = TRUE, echo = FALSE, fig.cap = "*MA-plot with the top 20 regions by significance labelled according to the most likely gene influenced by the detected signal. Regions which directly overlap a TSS are shown as empty circles*"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-ma-mapped-1.png") ## ----define-regions, eval = FALSE--------------------------------------------- # regions <- defineRegions( # genes = gencode$gene, transcripts = gencode$transcript, exons = gencode$exon # ) # regions ## ----bet-overlap, eval = FALSE------------------------------------------------ # tmm_results$bestOverlap <- bestOverlap(tmm_results, regions) %>% # factor(levels = names(regions)) # filter(tmm_results, hmp_fdr < 0.05, bestOverlap == "promoter") ## ----reg-as-factor, eval = FALSE---------------------------------------------- # reg_levels <- vapply(regions, function(x) x$region[1], character(1)) # tmm_results$bestOverlap <- tmm_results %>% # bestOverlap(unlist(regions), var = "region") %>% # factor(levels = reg_levels) ## ----reg-colours, eval = FALSE------------------------------------------------ # region_colours <- hcl.colors(length(reg_levels), "Viridis", rev = TRUE) # names(region_colours) <- reg_levels ## ----plot-pie-labels, eval = FALSE-------------------------------------------- # plotPie( # tmm_results, fill = "bestOverlap", min_p = 0.05, # cat_glue = "{.data[[fill]]}\n{n}\n({percent(p, 0.1)})" # ) + # scale_fill_manual(values = region_colours) ## ----add-pit, eval = TRUE, echo = FALSE, fig.cap = "*Pie chart showing customised labels. Here the regions have been modified to use title case in the labels, but without modifying the underlying data.*"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-pie-labels-1.png") ## ----plot-donut-default, eval = FALSE----------------------------------------- # plotSplitDonut( # tmm_results, inner = "bestOverlap", outer = "status", # inner_palette = region_colours # ) ## ----add-donut, eval = TRUE, echo=FALSE, fig.cap = "*Split-Donut chart showing status with overlapping region*"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-donut-default-1.png") ## ----plot-donut-customised, eval = FALSE-------------------------------------- # plotSplitDonut( # tmm_results, inner = "bestOverlap", outer = "status", # inner_palette = region_colours, outer_palette = status_colours, # inner_glue = "{str_replace_all(.data[[inner]], ' ', '\n')}\nn = {comma(n)}\n{percent(p,0.1)}", # outer_glue = "{.data[[outer]]}\n{str_replace_all(.data[[inner]], ' ', '\n')}\nn = {n}", # explode_outer = "(In|De)creased", explode_r = 0.3, # outer_label = "text", outer_min_p = 0, outer_max_p = 0.02 # ) ## ----add-donut-customised, eval=TRUE, echo=FALSE, fig.cap = "*Split-Donut chart exploding key regions and customising labels.*"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-donut-customised-1.png") ## ----bwfl, eval = FALSE------------------------------------------------------- # bwfl <- here::here( # "data", "H3K27ac", glue("{levels(samples$treatment)}_cov_chr10.bw") # ) %>% # BigWigFileList() %>% # setNames(levels(samples$treatment)) ## ----cytobands---------------------------------------------------------------- data("grch37.cytobands") head(grch37.cytobands) ## ----gr, eval = FALSE--------------------------------------------------------- # gr <- filter(tmm_results, hmp_fdr < 0.05, str_detect(bestOverlap, '^Prom'))[1] ## ----plot-hfgc, eval = FALSE-------------------------------------------------- # plotHFGC(gr, cytobands = grch37.cytobands, coverage = bwfl) ## ----cov-list, eval = FALSE--------------------------------------------------- # cov_list <- list(H3K27ac = bwfl) # cov_colour <- list(H3K27ac = treat_colours[levels(samples$treatment)]) # plotHFGC( # gr, cytobands = grch37.cytobands, # coverage = cov_list, linecol = cov_colour, zoom = 50, rotation.title = 90 # ) ## ----cov-annot, eval = FALSE-------------------------------------------------- # cov_annot <- splitAsList(tmm_results, tmm_results$status) %>% # endoapply(granges) # cov_annot <- list(H3K27ac = cov_annot) ## ----gene-models, eval = FALSE------------------------------------------------ # gene_models <- gencode$exon %>% # select( # type, gene = gene_id, exon = exon_id, transcript = transcript_id, # symbol = gene_name # ) ## ----plot-hfgc-with-annot, eval = FALSE--------------------------------------- # names(region_colours) <- names(regions) # plotHFGC( # gr, cytobands = grch37.cytobands, # features = regions, featcol = region_colours, featstack = "dense", # coverage = cov_list, linecol = cov_colour, # genes = gene_models, genecol = "wheat", collapseTranscripts = FALSE, # zoom = 350, shift = -1e5, # rotation.title = 90, covsize = 10, genesize = 1, featsize = 5, # annotation = cov_annot, annotcol = status_colours[c("Unchanged", "Increased")] # ) ## ----add-hfgc, eval=TRUE, echo=FALSE, fig.cap = "The addition of an annotation track for the coverage tracks shows which regions were retained during the analysis, as well as those which were considered as showing changed or unchanged signal"---- knitr::include_graphics("differential_signal_sliding_files/figure-gfm/plot-hfgc-with-annot-1.png") ## ----session-info------------------------------------------------------------- sessionInfo()