## ----install, eval=FALSE------------------------------------------------------
#  if (!require("BiocManager"))
#      install.packages("BiocManager")
#  BiocManager::install("GenomicFiles")

## ----quick_start-load, results="hide", message=FALSE--------------------------
library(GenomicFiles)

## ----quick_start-ranges-------------------------------------------------------
gr <- GRanges("chr14", IRanges(c(19411500 + (1:5)*20), width=10))

## ----class-bam-data-----------------------------------------------------------
library(RNAseqData.HNRNPC.bam.chr14)
fls <- RNAseqData.HNRNPC.bam.chr14_BAMFILES

## ----quick_start-MAP----------------------------------------------------------
MAP <- function(range, file, ...) {
    requireNamespace("Rsamtools")
    Rsamtools::pileup(file, scanBamParam=Rsamtools::ScanBamParam(which=range))
} 

## ----quick_start-reduceByFile-------------------------------------------------
se <- reduceByFile(gr, fls, MAP, summarize=TRUE)
se 

## ----quick_start-assays-------------------------------------------------------
dim(assays(se)$data) ## ranges x files

## ----quick_start-MAP-REDUCE-reduceByRange-------------------------------------
REDUCE <- function(mapped, ...) {
    cmb = do.call(rbind, mapped)
    xtabs(count ~ pos + nucleotide, cmb)
}
lst <- reduceByRange(gr, fls, MAP, REDUCE, iterate=FALSE)

## ----quick_start-result-------------------------------------------------------
head(lst[[1]], 3)

## ----overview-GenomicFiles----------------------------------------------------
GenomicFiles(gr, fls)

## ----fig1, echo = FALSE, fig.cap = "Mechanics of `reduceByRange` and `reduceRanges`"----
knitr::include_graphics("reduceByRange_flow.png")

## ----fig2, echo = FALSE, fig.cap = "Mechanics of `reduceByFile` and `reduceFiles`"----
knitr::include_graphics("reduceByFile_flow.png")

## ----pileups-ranges-----------------------------------------------------------
gr <- GRanges("chr14", IRanges(c(19411677, 19659063, 105421963,
                                 105613740), width=20))

## ----pileups-MAP--------------------------------------------------------------
MAP <- function(range, file, ...) {
    requireNamespace("deepSNV")
    ct <- deepSNV::bam2R(file,
                         GenomeInfoDb::seqlevels(range),
                         GenomicRanges::start(range),
                         GenomicRanges::end(range), q=0)
    ct[, c("A", "T", "C", "G", "a", "t", "c", "g")]
}

## ----pileups-REDUCE-----------------------------------------------------------
REDUCE <- function(mapped, ...) {
    Reduce("+", mapped)
}

## ----pileups-reduceByRange, message=FALSE-------------------------------------
pile2 <- reduceByRange(gr, fls, MAP, REDUCE)
length(pile2)
elementNROWS(pile2)

## ----pileups-res, warning=FALSE-----------------------------------------------
head(pile2[[1]])

## ----ttest-ranges, warning=FALSE----------------------------------------------
roi <- GRanges("chr14", IRanges(c(19411677, 19659063, 105421963,
               105613740), width=20))

## ----ttest-group--------------------------------------------------------------
grp <- factor(rep(c("A","B"), each=length(fls)/2))

## ----ttest-MAP----------------------------------------------------------------
MAP <- function(range, file, ...) {
    requireNamespace("GenomicAlignments")
    param <- Rsamtools::ScanBamParam(which=range)
    as.numeric(unlist(
        GenomicAlignments::coverage(file, param=param)[range], use.names=FALSE))
    }

## ----ttest-REDUCE-------------------------------------------------------------
REDUCE <- function(mapped, ..., grp) {
    mat = simplify2array(mapped)
    idx = which(rowSums(mat) != 0)
    df = genefilter::rowttests(mat[idx,], grp)
    cbind(offset = idx - 1, df)
}

## ----ttest-results, eval=FALSE------------------------------------------------
#  ttest <- reduceByRange(roi, fls, MAP, REDUCE, iterate=FALSE, grp=grp)

## ----junctions-ranges---------------------------------------------------------
gr <- GRanges("chr14", IRanges(c(19100000, 106000000), width=1e7))

## ----junctions-MAP------------------------------------------------------------
MAP <- function(range, file, ...) {
     requireNamespace("GenomicAlignments") ## for readGAlignments()
                                           ## ScanBamParam()
     param = Rsamtools::ScanBamParam(which=range)
     gal = GenomicAlignments::readGAlignments(file, param=param)
     table(GenomicAlignments::njunc(gal))
}

## ----junctions-GenomicFiles---------------------------------------------------
gf <- GenomicFiles(gr, fls)
gf

## ----junctions-counts1--------------------------------------------------------
counts1 <- reduceByFile(gf[,1:3], MAP=MAP)
length(counts1) ## 3 files
elementNROWS(counts1) ## 2 ranges

## ----junctions-counts1-show---------------------------------------------------
counts1[[1]]

## ----junctions-REDUCE---------------------------------------------------------
REDUCE <- function(mapped, ...)
    sum(sapply(mapped, "[", "1"))
    reduceByFile(gr, fls, MAP, REDUCE)

## ----junctions-counts2--------------------------------------------------------
counts2 <- reduceFiles(gf[,1:3], MAP=MAP)

## ----junctions-counts2-show---------------------------------------------------
## reduceFiles returns counts for all ranges.
counts2[[1]]
## reduceByFile returns counts for each range separately.
counts1[[1]]

## ----coverage1-tiles----------------------------------------------------------
chr14_seqlen <- seqlengths(seqinfo(BamFileList(fls))["chr14"])
tiles <- tileGenome(chr14_seqlen, ntile=5)

## ----coverage1-tiles-show-----------------------------------------------------
tiles

## ----coverage1-MAP------------------------------------------------------------
MAP = function(range, file, ...) {
    requireNamespace("GenomicAlignments") ## for ScanBamParam() and coverage()
    param = Rsamtools::ScanBamParam(which=range)
    rle = GenomicAlignments::coverage(file, param=param)[range]
    c(width = GenomicRanges::width(range),
      sum = sum(S4Vectors::runLength(rle) * S4Vectors::runValue(rle)))
}

## ----coverage1-REDUCE---------------------------------------------------------
REDUCE = function(mapped, ...) {
    Reduce(function(i, j) Map("+", i, j), mapped)
}

## ----coverage1-results, eval=FALSE--------------------------------------------
#  cvg1 <- reduceByFile(tiles, fls, MAP, REDUCE, iterate=TRUE)

## ----coverage2-MAP------------------------------------------------------------
MAP = function(range, file, ...) {
    requireNamespace("GenomicAlignments")  ## for ScanBamParam() and coverage()
    GenomicAlignments::coverage(
        file,
        param=Rsamtools::ScanBamParam(which=range))[range]
}

## ----coverage2-REDUCE---------------------------------------------------------
REDUCE = function(mapped, ...) {
    sapply(mapped, Reduce, f = "+")
}

## ----coverage2-results--------------------------------------------------------
cvg2 <- reduceFiles(unlist(tiles), fls, MAP, REDUCE)

## ----coverage2-show-----------------------------------------------------------
cvg2[1]

## ----coverage3-MAP------------------------------------------------------------
MAP = function(range, file, ...) {
    requireNamespace("BiocParallel")  ## for bplapply()
    nranges = 2
    idx = split(seq_along(range), ceiling(seq_along(range)/nranges))
    BiocParallel::bplapply(idx, 
        function(i, range, file) {
            requireNamespace("GenomicAlignments")  ## ScanBamParam(), coverage()
            chunk = range[i]
            param = Rsamtools::ScanBamParam(which=chunk)
            cvg = GenomicAlignments::coverage(file, param=param)[chunk]
            Reduce("+", cvg)            ## collapse coverage within chunks
        }, range, file)
}

## ----coverage3-REDUCE---------------------------------------------------------
REDUCE = function(mapped, ...) {
    sapply(mapped, Reduce, f = "+")
}

## ----coverage3-Results, eval=FALSE--------------------------------------------
#  cvg3 <- reduceFiles(unlist(tiles), fls, MAP, REDUCE)

## ----reduceByYield-YIELD------------------------------------------------------
library(GenomicAlignments)
bf <- BamFile(fls[1], yieldSize=100000)
YIELD <- function(x, ...) readGAlignments(x)

## ----reduceByYield-MAP-REDUCE-------------------------------------------------
gr <- unlist(tiles, use.names=FALSE)
MAP <- function(value, gr, ...) {
    requireNamespace("GenomicRanges") ## for countOverlaps()
    GenomicRanges::countOverlaps(gr, value)
    }
REDUCE <- `+`

## ----reduceByYield-DONE-------------------------------------------------------
DONE <- function(value) length(value) == 0L

## ----reduceByYield-bplapply---------------------------------------------------
FUN <- function(file, gr, YIELD, MAP, REDUCE, tiles, ...) {
    requireNamespace("GenomicAlignments") ## for BamFile, readGAlignments()
    requireNamespace("GenomicFiles") ## for reduceByYield()
    gr <- unlist(tiles, use.names=FALSE)
    bf <- Rsamtools::BamFile(file, yieldSize=100000)
    YIELD <- function(x, ...) GenomicAlignments::readGAlignments(x)
    MAP <- function(value, gr, ...) {
        requireNamespace("GenomicRanges") ## for countOverlaps()
        GenomicRanges::countOverlaps(gr, value)
    }
    REDUCE <- `+`
    GenomicFiles::reduceByYield(bf, YIELD, MAP, REDUCE, gr=gr, parallel=FALSE)
}

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()