## ---- echo=FALSE, results="hide", message=FALSE----------------------------
require(knitr)
opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

## ----style, echo=FALSE, results='asis'-------------------------------------
BiocStyle::markdown()

## --------------------------------------------------------------------------
library(TENxBrainData)
tenx <- TENxBrainData()
tenx

## --------------------------------------------------------------------------
counts(tenx)

## --------------------------------------------------------------------------
options(DelayedArray.block.size=2e9)

## ---- eval = FALSE---------------------------------------------------------
#  lib.sizes <- colSums(counts(tenx))
#  n.exprs <- colSums(counts(tenx) != 0L)
#  ave.exprs <- rowMeans(counts(tenx))

## --------------------------------------------------------------------------
tenx20k <- tenx[, seq_len(20000)]
chunksize <- 5000
cidx <- snow::splitIndices(ncol(tenx20k), ncol(tenx20k) / chunksize)

## --------------------------------------------------------------------------
lib.sizes <- n.exprs <- numeric(ncol(tenx20k))
tot.exprs <- numeric(nrow(tenx20k))
for (i in head(cidx, 2)) {
    message(".", appendLF=FALSE)
    m <- as.matrix(counts(tenx20k)[,i, drop=FALSE])
    lib.sizes[i] <- colSums(m)
    n.exprs[i] <- colSums(m != 0)
    tot.exprs <- tot.exprs + rowSums(m)
    }
ave.exprs <- tot.exprs / ncol(tenx20k)

## --------------------------------------------------------------------------
colData(tenx20k)$lib.sizes <- lib.sizes
colData(tenx20k)$n.exprs <- n.exprs
rowData(tenx20k)$ave.exprs <- ave.exprs

## --------------------------------------------------------------------------
hist(
    log10(colData(tenx20k)$lib.sizes),
    xlab=expression(Log[10] ~ "Library size"),
    col="grey80"
)

## --------------------------------------------------------------------------
hist(colData(tenx20k)$n.exprs, xlab="Number of detected genes", col="grey80")

## --------------------------------------------------------------------------
hist(
    log10(rowData(tenx20k)$ave.exprs),
    xlab=expression(Log[10] ~ "Average count"),
    col="grey80"
)

## --------------------------------------------------------------------------
o <- order(rowData(tenx20k)$ave.exprs, decreasing=TRUE)
head(rowData(tenx20k)[o,])

## ---- eval=FALSE-----------------------------------------------------------
#  destination <- tempfile()
#  saveRDS(tenx, file = destination)

## --------------------------------------------------------------------------
library(BiocParallel)
register(bpstart(SnowParam(5)))

## --------------------------------------------------------------------------
iterator <- function(tenx, cols_per_chunk = 5000, n = Inf) {
    start <- seq(1, ncol(tenx), by = cols_per_chunk)
    end <- c(tail(start, -1) - 1L, ncol(tenx))
    n <- min(n, length(start))
    i <- 0L
    function() {
        if (i == n)
            return(NULL)
        i <<- i + 1L
        c(start[i], end[i])
    }
}

## --------------------------------------------------------------------------
iter <- iterator(tenx)
iter()
iter()
iter()

## --------------------------------------------------------------------------
fun <- function(crng, counts, ...) {
    ## `fun()` needs to be self-contained for some parallel back-ends
    suppressPackageStartupMessages({
        library(TENxBrainData)
    })
    m <- as.matrix( counts[ , seq(crng[1], crng[2]) ] )
    list(
        row = list(
            n = rowSums(m != 0), sum = rowSums(m), sumsq = rowSums(m * m)
        ),
        column = list(
            n = colSums(m != 0), sum = colSums(m), sumsq = colSums(m * m)
        )
    )
}

## --------------------------------------------------------------------------
res <- fun( iter(), unname(counts(tenx)) )
str(res)

## --------------------------------------------------------------------------
reduce <- function(x, y) {
    list(
        row = Map(`+`, x$row, y$row),
        column = Map(`c`, x$column, y$column)
    )
}

## --------------------------------------------------------------------------
str( reduce(res, res) )

## --------------------------------------------------------------------------
res <- bpiterate(
    iterator(tenx, n = 5), fun, counts = unname(counts(tenx)), 
    REDUCE = reduce, reduce.in.order = TRUE
)
str(res)

## --------------------------------------------------------------------------
library(ExperimentHub)
hub <- ExperimentHub()
query(hub, "TENxBrainData")
fname <- hub[["EH1039"]]

## --------------------------------------------------------------------------
h5ls(fname)

## --------------------------------------------------------------------------
start <- h5read(fname, "/mm10/indptr", start=1, count=25001)
head(start)

## --------------------------------------------------------------------------
library(data.table)
dt <- data.table(
    row = h5read(fname, "/mm10/indices", start = 1, count = tail(start, 1)) + 1,
    column = rep(seq_len(length(start) - 1), diff(start)),
    count = h5read(fname, "/mm10/data", start = 1, count = tail(start, 1))
)
dt

## --------------------------------------------------------------------------
dt[ , 
    list(n = .N, sum = sum(count), sumsq = sum(count * count)),
    keyby=row]
dt[ , 
    list(n = .N, sum = sum(count), sumsq = sum(count * count)),
    keyby=column]

## --------------------------------------------------------------------------
sessionInfo()