## ----echo=F------------------------------------------------------------------- knitr::opts_chunk$set(fig.width=6, fig.height=4, cache=TRUE, autodep=TRUE) # To examine objects: # devtools::load_all(".", export_all=F) ; qwraps2::lazyload_cache_dir("vignettes/tail_length_cache/html") ## ----load, message=F---------------------------------------------------------- library(tidyverse) library(reshape2) library(SummarizedExperiment) library(limma) library(topconfects) library(org.Sc.sgd.db) library(weitrix) # Produce consistent results set.seed(12345) # BiocParallel supports multiple backends. # If the default hangs or errors, try others. BiocParallel::register( BiocParallel::SnowParam() ) # The most reliable backed is to use serial processing #BiocParallel::register( BiocParallel::SerialParam() ) tail <- system.file("GSE83162", "tail.csv.gz", package="weitrix") %>% read_csv() %>% column_to_rownames("Feature") %>% as.matrix() tail_count <- system.file("GSE83162", "tail_count.csv.gz", package="weitrix") %>% read_csv() %>% column_to_rownames("Feature") %>% as.matrix() samples <- data.frame(sample=I(colnames(tail))) %>% extract(sample, c("strain","time"), c("(.+)-(.+)"), remove=FALSE) %>% mutate( strain=factor(strain,unique(strain)), time=factor(time,unique(time))) rownames(samples) <- colnames(tail) samples ## ----weitrix, message=FALSE--------------------------------------------------- log2_tail <- log2(tail) good <- rowMeans(tail_count) >= 10 table(good) wei <- as_weitrix( log2_tail[good,,drop=FALSE], weights=tail_count[good,,drop=FALSE]) rowData(wei)$gene <- AnnotationDbi::select( org.Sc.sgd.db, keys=rownames(wei), columns=c("GENENAME"))$GENENAME rowData(wei)$total_reads <- rowSums(weitrix_weights(wei)) colData(wei) <- cbind(colData(wei), samples) ## ----cal---------------------------------------------------------------------- design <- model.matrix(~ strain + time, data=colData(wei)) ## ----------------------------------------------------------------------------- cal_design <- weitrix_calibrate_trend(wei, design, ~splines::ns(log2(total_reads),3)) ## ----disp-calc---------------------------------------------------------------- rowData(cal_design)$dispersion_unweighted <- weitrix_dispersions(weitrix_x(wei), design) ## ----disp-plot-unif----------------------------------------------------------- rowData(cal_design) %>% as.data.frame() %>% ggplot(aes(x=total_reads, y=dispersion_unweighted)) + geom_point(size=0.1) + scale_x_log10() + scale_y_log10() + labs(title="Dispersion if uniform weights used (log scales)") ## ----disp-plot-weighted------------------------------------------------------- rowData(cal_design) %>% as.data.frame() %>% ggplot(aes(x=total_reads, y=dispersion_before)) + geom_point(size=0.1) + geom_line(aes(y=dispersion_trend), color="red") + scale_x_log10() + scale_y_log10() + labs(title="Dispersion with read-counts as weights (log scales)") ## ----disp-plot-cal------------------------------------------------------------ rowData(cal_design) %>% as.data.frame() %>% ggplot(aes(x=total_reads, y=dispersion_after)) + geom_point(size=0.1) + scale_x_log10() + scale_y_log10() + labs(title="Dispersion with read-count weights / trend-based calibration\n(log scales)") ## ----limmadesign-------------------------------------------------------------- fit_cal_design <- cal_design %>% weitrix_elist() %>% lmFit(design) ebayes_fit <- eBayes(fit_cal_design) result_signif <- topTable(ebayes_fit, "strainDeltaSet1", n=Inf) result_signif %>% dplyr::select(gene,diff_log2_tail=logFC,ave_log2_tail=AveExpr, adj.P.Val,total_reads) %>% head(20) ## ----confectsdesign----------------------------------------------------------- result_confects <- limma_confects( fit_cal_design, "strainDeltaSet1", full=TRUE, fdr=0.05) result_confects$table %>% dplyr::select(gene,diff_log2_tail=effect,confect,total_reads) %>% head(20) cat(sum(!is.na(result_confects$table$confect)), "genes significantly non-zero at FDR 0.05\n") ## ----examine, fig.show="hold", fig.width=4, fig.height=3---------------------- view_gene <- function(id, title="") { ggplot(samples, aes(x=time,color=strain,group=strain, y=tail[id,])) + geom_hline(yintercept=0) + geom_line() + geom_point(aes(size=tail_count[id,])) + labs(x="Time", y="Tail length", size="Read count", title=paste(id,title)) } # Top "significant" genes view_gene("YDR170W-A") view_gene("YJR027W/YJR026W") view_gene("YAR009C") view_gene("YIL015W","BAR1") # topconfects has highlighted some genes with lower total reads view_gene("YER133W","GLC7") view_gene("YCR014C","POL4") ## ----comp, message=F---------------------------------------------------------- comp_seq <- weitrix_components_seq(wei, p=10) ## ----scree-------------------------------------------------------------------- components_seq_screeplot(comp_seq) ## ----exam--------------------------------------------------------------------- comp <- comp_seq[[3]] matrix_long(comp$col[,-1], row_info=samples, varnames=c("sample","component")) %>% ggplot(aes(x=time, y=value, color=strain, group=strain)) + geom_hline(yintercept=0) + geom_line() + geom_point(alpha=0.75, size=3) + facet_grid(component ~ .) + labs(title="Sample scores for each component", y="Sample score", x="Time", color="Strain") ## ----limmacomp---------------------------------------------------------------- cal_comp <- weitrix_calibrate_trend(wei, comp, ~splines::ns(log2(total_reads),3)) fit_comp <- cal_comp %>% weitrix_elist() %>% lmFit(comp$col) ## ----C1----------------------------------------------------------------------- result_C1 <- limma_confects(fit_comp, "C1") ## ----examine_C1, echo=FALSE, fig.show="hold", fig.width=4, fig.height=3------- result_C1$table %>% dplyr::select(gene,loading=effect,confect,total_reads) %>% head(10) cat(sum(!is.na(result_C1$table$confect)), "genes significantly non-zero at FDR 0.05\n") view_gene("YBL016W", "FUS3") view_gene("YOR096W", "RPS7A") view_gene("YDR092W", "UBC1") view_gene("YLR118C", "") ## ----C2----------------------------------------------------------------------- result_C2 <- limma_confects(fit_comp, "C2") ## ----examine_C2, echo=FALSE, fig.show="hold", fig.width=4, fig.height=3------- result_C2$table %>% dplyr::select(gene,loading=effect,confect,total_reads) %>% head(10) cat(sum(!is.na(result_C2$table$confect)), "genes significantly non-zero at FDR 0.05\n") view_gene("YDR476C","") view_gene("YPL131W","RPL5") view_gene("YCR014C","POL4") view_gene("YGR251W","NOP19") ## ----C3----------------------------------------------------------------------- result_C3 <- limma_confects(fit_comp, "C3") ## ----examine_C3, echo=FALSE, fig.show="hold", fig.width=4, fig.height=3------- result_C3$table %>% dplyr::select(gene,loading=effect,confect,total_reads) %>% head(10) cat(sum(!is.na(result_C3$table$confect)), "genes significantly non-zero at FDR 0.05\n") view_gene("YFL014W", "HSP12") view_gene("YNL036W", "NCE103") view_gene("YJL173C", "RFA3") view_gene("YBL003C","HTA2") view_gene("YDR461W", "MFA1")