--- output: html_document: toc: true theme: united knitrBootstrap::bootstrap_document: theme.chooser: TRUE highlight.chooser: TRUE --- Introduction to `derfinderData` =============================== If you wish, you can view this vignette online [here](http://lcolladotor.github.io/derfinderData/). ```{r vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE} ## Track time spent on making the vignette startTime <- Sys.time() ## Bib setup library('knitcitations') ## Load knitcitations with a clean bibliography cleanbib() cite_options(hyperlink = 'to.doc', citation_format = 'text', style = 'html') # Note links won't show for now due to the following issue # https://github.com/cboettig/knitcitations/issues/63 ## Write bibliography information write.bibtex(c(knitcitations = citation('knitcitations'), brainspan = RefManageR::BibEntry(bibtype = 'Unpublished', key = 'brainspan', title = 'Atlas of the Developing Human Brain [Internet]. Funded by ARRA Awards 1RC2MH089921-01, 1RC2MH090047-01, and 1RC2MH089929-01.', author = 'BrainSpan', year = 2011, url = 'http://developinghumanbrain.org'), knitrBootstrap = citation('knitrBootstrap'), knitr = citation('knitr')[3], rmarkdown = citation('rmarkdown')), file = 'derfinderDataRef.bib') bib <- read.bibtex('derfinderDataRef.bib') ## Assign short names names(bib) <- c('knitcitations', 'brainspan', 'knitrBootstrap', 'knitr', 'rmarkdown') ``` # Overview `derfinderData` is a small data package with information extracted from _BrainSpan_ (see [here](http://download.alleninstitute.org/brainspan/MRF_BigWig_Gencode_v10/bigwig/)) `r citep(bib[['brainspan']])` for 24 samples restricted to chromosome 21. The BigWig files in this package can then be used by other packages for examples, such as in `derfinder` and `derfinderPlot`. While you could download the data from _BrainSpan_ `r citep(bib[['brainspan']])`, this package is helpful for scenarios where you might encounter some difficulties such as the one described in this [thread](https://stat.ethz.ch/pipermail/bioc-devel/2014-September/006329.html). # Data The following code builds the phenotype table included in `derfinderData`. For two randomly selected structures, 12 samples were chosen with 6 of them being fetal samples and the other 6 coming from adult individuals. For the fetal samples, the age in PCW is transformed into age in years by age_in_years = (age_in_PCW - 40) / 52 In other data sets you might want to subtract 42 instead of 40 if some observations have PCW up to 42. ```{r 'pheno'} ## Construct brainspanPheno table brainspanPheno <- data.frame( gender = c('F', 'M', 'M', 'M', 'F', 'F', 'F', 'M', 'F', 'M', 'M', 'F', 'M', 'M', 'M', 'M', 'F', 'F', 'F', 'M', 'F', 'M', 'M', 'F'), lab = c('HSB97.AMY', 'HSB92.AMY', 'HSB178.AMY', 'HSB159.AMY', 'HSB153.AMY', 'HSB113.AMY', 'HSB130.AMY', 'HSB136.AMY', 'HSB126.AMY', 'HSB145.AMY', 'HSB123.AMY', 'HSB135.AMY', 'HSB114.A1C', 'HSB103.A1C', 'HSB178.A1C', 'HSB154.A1C', 'HSB150.A1C', 'HSB149.A1C', 'HSB130.A1C', 'HSB136.A1C', 'HSB126.A1C', 'HSB145.A1C', 'HSB123.A1C', 'HSB135.A1C'), Age = c(-0.442307692307693, -0.365384615384615, -0.461538461538461, -0.307692307692308, -0.538461538461539, -0.538461538461539, 21, 23, 30, 36, 37, 40, -0.519230769230769, -0.519230769230769, -0.461538461538461, -0.461538461538461, -0.538461538461539, -0.519230769230769, 21, 23, 30, 36, 37, 40) ) brainspanPheno$structure_acronym <- rep(c('AMY', 'A1C'), each = 12) brainspanPheno$structure_name <- rep(c('amygdaloid complex', 'primary auditory cortex (core)'), each = 12) brainspanPheno$file <- paste0('http://download.alleninstitute.org/brainspan/MRF_BigWig_Gencode_v10/bigwig/', brainspanPheno$lab, '.bw') brainspanPheno$group <- factor(ifelse(brainspanPheno$Age < 0, 'fetal', 'adult'), levels = c('fetal', 'adult')) ``` We can then save the phenotype information, which is included in `derfinderData`. ```{r 'savePheno', eval = FALSE} ## Save pheno table save(brainspanPheno, file = 'brainspanPheno.RData') ``` Here is how the data looks like: ```{r 'explorePheno', results = 'asis', bootstrap.show.code = FALSE} library('knitr') ## Explore pheno p <- brainspanPheno[, -which(colnames(brainspanPheno) %in% c('structure_acronym', 'structure_name', 'file'))] kable(p, format = 'html', row.names = TRUE) ``` We can verify that this is indeed the information included in `derfinderData`. ```{r 'verifyPheno'} ## Rename our newly created pheno data newPheno <- brainspanPheno ## Load the included data library('derfinderData') ## Verify identical(newPheno, brainspanPheno) ``` Using the phenotype information, you can use `derfinder` to extract the base-level coverage information for chromosome 21 from these samples. Then, you can export the data to BigWig files. ```{r 'getData', eval = FALSE} library('derfinder') ## Determine the files to use and fix the names files <- brainspanPheno$file names(files) <- gsub('.AMY|.A1C', '', brainspanPheno$lab) ## Load the data system.time(fullCovAMY <- fullCoverage( files = files[brainspanPheno$structure_acronym == 'AMY'], chrs = 'chr21')) #user system elapsed #4.505 0.178 37.676 system.time(fullCovA1C <- fullCoverage( files = files[brainspanPheno$structure_acronym == 'A1C'], chrs = 'chr21')) #user system elapsed #2.968 0.139 27.704 ## Write BigWig files dir.create('AMY') system.time(createBw(fullCovAMY, path = 'AMY', keepGR = FALSE)) #user system elapsed #5.749 0.332 6.045 dir.create('A1C') system.time(createBw(fullCovA1C, path = 'A1C', keepGR = FALSE)) #user system elapsed #5.025 0.299 5.323 ## Check that 12 files were created in each directory all(c(length(dir('AMY')), length(dir('A1C'))) == 12) #TRUE ## Save data for examples running on Windows save(fullCovAMY, file = 'fullCovAMY.RData') save(fullCovA1C, file = 'fullCovA1C.RData') ``` These BigWig files are available under _extdata_ as shown below: ```{r 'findData'} ## Find AMY BigWigs dir(system.file('extdata', 'AMY', package = 'derfinderData')) ## Find A1C BigWigs dir(system.file('extdata', 'A1C', package = 'derfinderData')) ``` # Reproducibility Code for creating the vignette ```{r createVignette, eval=FALSE, bootstrap.show.code=FALSE} ## Create the vignette library('knitrBootstrap') knitrBootstrapFlag <- packageVersion('knitrBootstrap') < '1.0.0' if(knitrBootstrapFlag) { ## CRAN version library('knitrBootstrap') system.time(knit_bootstrap('derfinderData.Rmd', chooser=c('boot', 'code'), show_code = TRUE)) unlink('derfinderData.md') } else { ## GitHub version library('rmarkdown') system.time(render('derfinderData.Rmd', 'knitrBootstrap::bootstrap_document')) } ## Note: if you prefer the knitr version use: # library('rmarkdown') # system.time(render('derfinder.Rmd', 'html_document')) ## Extract the R code library('knitr') knit('derfinderData.Rmd', tangle = TRUE) ## Clean up file.remove('derfinderDataRef.bib') ``` Date the vignette was generated. ```{r reproducibility1, echo=FALSE, bootstrap.show.code=FALSE} ## Date the vignette was generated Sys.time() ``` Wallclock time spent generating the vignette. ```{r reproducibility2, echo=FALSE, bootstrap.show.code=FALSE} ## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits=3) ``` `R` session information. ```{r reproducibility3, echo=FALSE, bootstrap.show.code=FALSE, bootstrap.show.message=FALSE} ## Session info library('sessioninfo') session_info() ``` # Bibliography This vignette was generated using `knitrBootstrap` `r citep(bib[['knitrBootstrap']])` with `knitr` `r citep(bib[['knitr']])` and `rmarkdown` `r citep(bib[['rmarkdown']])` running behind the scenes. Citations made with `knitcitations` `r citep(bib[['knitcitations']])`. ```{r vignetteBiblio, results='asis', echo=FALSE} ## Print bibliography bibliography() ```