## ----setup, echo=FALSE--------------------------------------------------------
knitr::opts_chunk$set(collapse=TRUE)

## ----eval = FALSE-------------------------------------------------------------
# if (!"BiocManager" %in% rownames(installed.packages()))
#      install.packages("BiocManager")
# BiocManager::install("BiocFileCache", dependencies=TRUE)

## ----results='hide', warning=FALSE, message=FALSE-----------------------------
library(BiocFileCache)

## -----------------------------------------------------------------------------
path <- tempfile()
bfc <- BiocFileCache(path, ask = FALSE)

## ----url----------------------------------------------------------------------
## paste to avoid long line in vignette
url <- paste(
    "ftp://ftp.ensembl.org/pub/release-71/gtf",
    "homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz",
    sep="/")

## ----eval=FALSE---------------------------------------------------------------
# library(BiocFileCache)
# bfc <- BiocFileCache()
# path <- bfcrpath(bfc, url)

## ----eval=FALSE---------------------------------------------------------------
# gtf <- rtracklayer::import.gff(path)

## ----eval=FALSE---------------------------------------------------------------
# gtf <- rtracklayer::import.gff(bfcrpath(BiocFileCache(), url))

## ----eval=FALSE---------------------------------------------------------------
# library(BiocFileCache)
# bfc <- BiocFileCache("~/my-experiment/results")

## ----eval=FALSE---------------------------------------------------------------
# suppressPackageStartupMessages({
#     library(DESeq2)
#     library(airway)
# })
# data(airway)
# dds <- DESeqDataData(airway, design = ~ cell + dex)
# result <- DESeq(dds)

## ----eval=FALSE---------------------------------------------------------------
# saveRDS(result, bfcnew(bfc, "airway / DESeq standard analysis"))

## ----eval=FALSE---------------------------------------------------------------
# result <- readRDS(bfcrpath(bfc, "airway / DESeq standard analysis"))

## ----eval=FALSE---------------------------------------------------------------
# suppressPackageStartupMessages({
#     library(BiocFileCache)
#     library(rtracklayer)
# })
# 
# # load the cache
# path <- file.path(tempdir(), "tempCacheDir")
# bfc <- BiocFileCache(path)
# 
# # the web resource of interest
# url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"
# 
# # check if url is being tracked
# res <- bfcquery(bfc, url, exact=TRUE)
# 
# if (bfccount(res) == 0L) {
# 
#     # if it is not in cache, add
#     ans <- bfcadd(bfc, rname="ensembl, homo sapien", fpath=url)
# 
# } else {
# 
#   # if it is in cache, get path to load
#   rid = res$rid
#   ans <- bfcrpath(bfc, rid)
# 
#   # check to see if the resource needs to be updated
#   check <- bfcneedsupdate(bfc, rid)
#   # check can be NA if it cannot be determined, choose how to handle
#   if (is.na(check)) check <- TRUE
#   if (check){
#     ans < - bfcdownload(bfc, rid)
#   }
# }
# 
# # ans is the path of the file to load
# ans
# 
# # we know because we search for the url that the file is a .gtf.gz,
# # if we searched on other terms we can use 'bfcpath' to see the
# # original fpath to know the appropriate load/read/import method
# bfcpath(bfc, names(ans))
# 
# temp = GTFFile(ans)
# info = import(temp)

## ----ensemblremote, eval=TRUE-------------------------------------------------

#
# A simpler test to see if something is in the cache
# and if not start tracking it is using `bfcrpath`
#

suppressPackageStartupMessages({
    library(BiocFileCache)
    library(rtracklayer)
})

# load the cache
path <- file.path(tempdir(), "tempCacheDir")
bfc <- BiocFileCache(path, ask=FALSE)

# the web resources of interest
url <- "ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"

url2 <- "ftp://ftp.ensembl.org/pub/release-71/gtf/rattus_norvegicus/Rattus_norvegicus.Rnor_5.0.71.gtf.gz"

# if not in cache will download and create new entry
pathsToLoad <- bfcrpath(bfc, c(url, url2))

pathsToLoad

# now load files as see fit
info = import(GTFFile(pathsToLoad[1]))
class(info)
summary(info)

## ----eval=FALSE---------------------------------------------------------------
# #
# # One could also imagine the following:
# #
# 
# library(BiocFileCache)
# 
# # load the cache
# bfc <- BiocFileCache()
# 
# #
# # Do some work!
# #
# 
# # add a location in the cache
# filepath <- bfcnew(bfc, "R workspace")
# 
# save(list = ls(), file=filepath)
# 
# # now the R workspace is being tracked in the cache

## ----eval=FALSE---------------------------------------------------------------
# .get_cache <-
#     function()
# {
#     cache <- tools::R_user_dir("MyNewPackage", which="cache")
#     BiocFileCache::BiocFileCache(cache)
# }

## ----eval=FALSE---------------------------------------------------------------
# download_data_file <-
#     function( verbose = FALSE )
# {
#     fileURL <- "http://a_path_to/someremotefile.tsv.gz"
# 
#     bfc <- .get_cache()
#     rid <- bfcquery(bfc, "geneFileV2", "rname")$rid
#     if (!length(rid)) {
# 	 if( verbose )
# 	     message( "Downloading GENE file" )
# 	 rid <- names(bfcadd(bfc, "geneFileV2", fileURL ))
#     }
#     if (!isFALSE(bfcneedsupdate(bfc, rid)))
# 	bfcdownload(bfc, rid)
# 
#     bfcrpath(bfc, rids = rid)
# }

## ----preprocess---------------------------------------------------------------
url <- "http://bioconductor.org/packages/stats/bioc/BiocFileCache/BiocFileCache_stats.tab"

headFile <-                         # how to process file before caching
    function(from, to)
{
    dat <- readLines(from)
    writeLines(head(dat), to)
    TRUE
}

rid <- bfcquery(bfc, url, "fpath")$rid
if (!length(rid))                   # not in cache, add but do not download
    rid <- names(bfcadd(bfc, url, download = FALSE))

update <- bfcneedsupdate(bfc, rid)  # TRUE if newly added or stale
if (!isFALSE(update))               # download & process
    bfcdownload(bfc, rid, ask = FALSE, FUN = headFile)

rpath <- bfcrpath(bfc, rids=rid)    # path to processed result
readLines(rpath)                    # read processed result

## ----sessioninfo--------------------------------------------------------------
sessionInfo()