## ----echo=F-------------------------------------------------------------------
knitr::opts_chunk$set(fig.width=6, fig.height=4, cache=TRUE, autodep=TRUE)

# To examine objects:
# devtools::load_all(".", export_all=F) ; qwraps2::lazyload_cache_dir("vignettes/tail_length_cache/html")

## ----load, message=F----------------------------------------------------------
library(tidyverse)
library(reshape2)
library(SummarizedExperiment)
library(limma)
library(topconfects)
library(org.Sc.sgd.db)
library(weitrix)

# Produce consistent results
set.seed(12345)

# BiocParallel supports multiple backends. 
# If the default hangs or errors, try others.
BiocParallel::register( BiocParallel::SnowParam() )

# The most reliable backed is to use serial processing
#BiocParallel::register( BiocParallel::SerialParam() )

tail <- system.file("GSE83162", "tail.csv.gz", package="weitrix") %>%
    read_csv() %>%
    column_to_rownames("Feature") %>%
    as.matrix()

tail_count <- system.file("GSE83162", "tail_count.csv.gz", package="weitrix") %>%
    read_csv() %>%
    column_to_rownames("Feature") %>%
    as.matrix()
    
samples <- data.frame(sample=I(colnames(tail))) %>%
    extract(sample, c("strain","time"), c("(.+)-(.+)"), remove=FALSE) %>%
    mutate(
        strain=factor(strain,unique(strain)), 
        time=factor(time,unique(time)))
rownames(samples) <- colnames(tail)

samples

## ----weitrix, message=FALSE---------------------------------------------------
log2_tail <- log2(tail)

good <- rowMeans(tail_count) >= 10
table(good)

wei <- as_weitrix(
    log2_tail[good,,drop=FALSE], 
    weights=tail_count[good,,drop=FALSE])

rowData(wei)$gene <- AnnotationDbi::select(
    org.Sc.sgd.db, keys=rownames(wei), columns=c("GENENAME"))$GENENAME
rowData(wei)$total_reads <- rowSums(weitrix_weights(wei))
colData(wei) <- cbind(colData(wei), samples)

## ----cal----------------------------------------------------------------------
design <- model.matrix(~ strain + time, data=colData(wei))

## -----------------------------------------------------------------------------
cal_design <- weitrix_calibrate_trend(wei, design, ~splines::ns(log2(total_reads),3))

## ----disp-calc----------------------------------------------------------------
rowData(cal_design)$dispersion_unweighted <- weitrix_dispersions(weitrix_x(wei), design)

## ----disp-plot-unif-----------------------------------------------------------
rowData(cal_design) %>% as.data.frame() %>%
    ggplot(aes(x=total_reads, y=dispersion_unweighted)) + geom_point(size=0.1) +
    scale_x_log10() + scale_y_log10() +
    labs(title="Dispersion if uniform weights used (log scales)")

## ----disp-plot-weighted-------------------------------------------------------
rowData(cal_design) %>% as.data.frame() %>%
    ggplot(aes(x=total_reads, y=dispersion_before)) + geom_point(size=0.1) +
    geom_line(aes(y=dispersion_trend), color="red") +
    scale_x_log10() + scale_y_log10() +
    labs(title="Dispersion with read-counts as weights (log scales)")

## ----disp-plot-cal------------------------------------------------------------
rowData(cal_design) %>% as.data.frame() %>%
    ggplot(aes(x=total_reads, y=dispersion_after)) + geom_point(size=0.1) +
    scale_x_log10() + scale_y_log10() +
    labs(title="Dispersion with read-count weights / trend-based calibration\n(log scales)")

## ----limmadesign--------------------------------------------------------------
fit_cal_design <- cal_design %>%
    weitrix_elist() %>%
    lmFit(design)

ebayes_fit <- eBayes(fit_cal_design)
result_signif <- topTable(ebayes_fit, "strainDeltaSet1", n=Inf)

result_signif %>%
    dplyr::select(gene,diff_log2_tail=logFC,ave_log2_tail=AveExpr,
        adj.P.Val,total_reads) %>%
    head(20)

## ----confectsdesign-----------------------------------------------------------
result_confects <- limma_confects(
    fit_cal_design, "strainDeltaSet1", full=TRUE, fdr=0.05)

result_confects$table %>% 
    dplyr::select(gene,diff_log2_tail=effect,confect,total_reads) %>% 
    head(20)

cat(sum(!is.na(result_confects$table$confect)), 
    "genes significantly non-zero at FDR 0.05\n")

## ----examine, fig.show="hold", fig.width=4, fig.height=3----------------------
view_gene <- function(id, title="") {
    ggplot(samples, aes(x=time,color=strain,group=strain, y=tail[id,])) +
       geom_hline(yintercept=0) + 
       geom_line() + 
       geom_point(aes(size=tail_count[id,])) +
       labs(x="Time", y="Tail length", size="Read count", title=paste(id,title))
}

# Top "significant" genes
view_gene("YDR170W-A")
view_gene("YJR027W/YJR026W")
view_gene("YAR009C")
view_gene("YIL015W","BAR1")

# topconfects has highlighted some genes with lower total reads
view_gene("YER133W","GLC7")
view_gene("YCR014C","POL4")

## ----comp, message=F----------------------------------------------------------
comp_seq <- weitrix_components_seq(wei, p=10)

## ----scree--------------------------------------------------------------------
components_seq_screeplot(comp_seq)

## ----exam---------------------------------------------------------------------
comp <- comp_seq[[3]]

matrix_long(comp$col[,-1], row_info=samples, varnames=c("sample","component")) %>%
    ggplot(aes(x=time, y=value, color=strain, group=strain)) + 
    geom_hline(yintercept=0) + 
    geom_line() + 
    geom_point(alpha=0.75, size=3) + 
    facet_grid(component ~ .) +
    labs(title="Sample scores for each component", y="Sample score", x="Time", color="Strain")

## ----limmacomp----------------------------------------------------------------
cal_comp <- weitrix_calibrate_trend(wei, comp, ~splines::ns(log2(total_reads),3))
fit_comp <- cal_comp %>%
    weitrix_elist() %>%
    lmFit(comp$col)

## ----C1-----------------------------------------------------------------------
result_C1 <- limma_confects(fit_comp, "C1")

## ----examine_C1, echo=FALSE, fig.show="hold", fig.width=4, fig.height=3-------
result_C1$table %>% 
    dplyr::select(gene,loading=effect,confect,total_reads) %>% 
    head(10)

cat(sum(!is.na(result_C1$table$confect)), 
    "genes significantly non-zero at FDR 0.05\n")

view_gene("YBL016W", "FUS3")
view_gene("YOR096W", "RPS7A")
view_gene("YDR092W", "UBC1")
view_gene("YLR118C", "")

## ----C2-----------------------------------------------------------------------
result_C2 <- limma_confects(fit_comp, "C2")

## ----examine_C2, echo=FALSE, fig.show="hold", fig.width=4, fig.height=3-------
result_C2$table %>% 
    dplyr::select(gene,loading=effect,confect,total_reads) %>% 
    head(10)

cat(sum(!is.na(result_C2$table$confect)), 
    "genes significantly non-zero at FDR 0.05\n")

view_gene("YDR476C","")
view_gene("YPL131W","RPL5")
view_gene("YCR014C","POL4")
view_gene("YGR251W","NOP19")

## ----C3-----------------------------------------------------------------------
result_C3 <- limma_confects(fit_comp, "C3")

## ----examine_C3, echo=FALSE, fig.show="hold", fig.width=4, fig.height=3-------
result_C3$table %>% 
    dplyr::select(gene,loading=effect,confect,total_reads) %>% 
    head(10)

cat(sum(!is.na(result_C3$table$confect)), 
    "genes significantly non-zero at FDR 0.05\n")

view_gene("YFL014W", "HSP12")
view_gene("YNL036W",  "NCE103")
view_gene("YJL173C", "RFA3")
view_gene("YBL003C","HTA2")
view_gene("YDR461W", "MFA1")