Use the Data in this Data Package

Introduction

This data package contains code to generate various filtered forms of a normalised dataset based on downloading the accessions in inst/hsapiens_colData_transitions_v3.5.csv for species “hsapiens” from DEE2, and it also has a way (for reproducibility and also ease of use) to use AnnotationHub (as the data files were too large to fit inside of the data package itself) to fetch data pregenerated at the initial package development time.

Installation

We assume you already have R installed and configured. If you do not have Bioconductor installed, you can install it like so:

if (!requireNamespace("BiocManager", quietly=TRUE))
    install.packages("BiocManager")
BiocManager::install()

Then, if you have not installed this package and the dependencies from Bioconductor, you can install them like so:

BiocManager::install(c("CellScore", "homosapienDEE2CellScore", "devtools", "getDEE2", "SummarizedExperiment"))

Setup

In order to make the needed libraries accessible, you will need the following setup:

library(DESeq2)
#> Loading required package: S4Vectors
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#>     as.data.frame, basename, cbind, colnames, dirname, do.call,
#>     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#>     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, rank, rbind, rownames, sapply, setdiff, table, tapply,
#>     union, unique, unsplit, which.max, which.min
#> 
#> Attaching package: 'S4Vectors'
#> The following object is masked from 'package:utils':
#> 
#>     findMatches
#> The following objects are masked from 'package:base':
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomicRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: SummarizedExperiment
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#>     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#>     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#>     colWeightedMeans, colWeightedMedians, colWeightedSds,
#>     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#>     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#>     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#>     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#>     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#>     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> 
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#> 
#>     rowMedians
#> The following objects are masked from 'package:matrixStats':
#> 
#>     anyMissing, rowMedians
library(S4Vectors)
library(Biobase)
library(SummarizedExperiment)
library(getDEE2)
library(devtools)
#> Loading required package: usethis
library(CellScore)
library(homosapienDEE2CellScore)

Getting and using the data

All of the permutations of filtering and normalising the data can be downloaded in a labeled list like so:

the_data<-downloadAllTheData()
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache
#> see ?homosapienDEE2CellScore and browseVignettes('homosapienDEE2CellScore') for documentation
#> loading from cache

We can then get the data we are after - non-normalised data including samples with quality control warnings - and use it in CellScore to calculate the on/off score for cell transitions from fibroblast to embryonic stem cells:

sm <- the_data$HomosapienDEE2_QC_WARN_Raw
## We could have just run `sm <- homosapienDEE2CellScore::readInSEZip(homosapienDEE2CellScore::HomosapienDEE2_QC_PASS_Raw())`
## instead of downloading all the data.

# Here we want to analyse all of the raw data to calculate the
# on/off score for cell transitions from fibroblast to embryonic stem cells
test1 <- sm[, sm$category == 'test']
standard <- sm[, sm$category == 'standard']
sm1 <- cbind(test1, standard)
cell.change <- data.frame(start=c("FIB"), test=c("nESC"), target=c("ESC"))
group.OnOff <- OnOff(sm1, cell.change, out.put="marker.list")
#> Warning in .calculateGroupOnOff(score.comparisons, calls, pdata, annot, :
#> Multiple array platforms exist in the phenotype data.

Session Info

sessionInfo()
#> R version 4.4.0 beta (2024-04-15 r86425)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] homosapienDEE2CellScore_1.0.0 CellScore_1.24.0             
#>  [3] devtools_2.4.5                usethis_2.2.3                
#>  [5] getDEE2_1.14.0                DESeq2_1.44.0                
#>  [7] SummarizedExperiment_1.34.0   Biobase_2.64.0               
#>  [9] MatrixGenerics_1.16.0         matrixStats_1.3.0            
#> [11] GenomicRanges_1.56.0          GenomeInfoDb_1.40.0          
#> [13] IRanges_2.38.0                S4Vectors_0.42.0             
#> [15] BiocGenerics_0.50.0          
#> 
#> loaded via a namespace (and not attached):
#>   [1] DBI_1.2.2               bitops_1.0-7            remotes_2.5.0          
#>   [4] rlang_1.1.3             magrittr_2.0.3          compiler_4.4.0         
#>   [7] RSQLite_2.3.6           png_0.1-8               vctrs_0.6.5            
#>  [10] htm2txt_2.2.2           stringr_1.5.1           profvis_0.3.8          
#>  [13] pkgconfig_2.0.3         crayon_1.5.2            fastmap_1.1.1          
#>  [16] dbplyr_2.5.0            XVector_0.44.0          ellipsis_0.3.2         
#>  [19] caTools_1.18.2          utf8_1.2.4              promises_1.3.0         
#>  [22] rmarkdown_2.26          sessioninfo_1.2.2       UCSC.utils_1.0.0       
#>  [25] bit_4.0.5               purrr_1.0.2             xfun_0.43              
#>  [28] zlibbioc_1.50.0         cachem_1.0.8            jsonlite_1.8.8         
#>  [31] blob_1.2.4              SnowballC_0.7.1         later_1.3.2            
#>  [34] DelayedArray_0.30.0     BiocParallel_1.38.0     parallel_4.4.0         
#>  [37] R6_2.5.1                bslib_0.7.0             stringi_1.8.3          
#>  [40] RColorBrewer_1.1-3      pkgload_1.3.4           jquerylib_0.1.4        
#>  [43] Rcpp_1.0.12             knitr_1.46              httpuv_1.6.15          
#>  [46] Matrix_1.7-0            tidyselect_1.2.1        abind_1.4-5            
#>  [49] yaml_2.3.8              gplots_3.1.3.1          codetools_0.2-20       
#>  [52] miniUI_0.1.1.1          curl_5.2.1              pkgbuild_1.4.4         
#>  [55] lattice_0.22-6          tibble_3.2.1            withr_3.0.0            
#>  [58] KEGGREST_1.44.0         shiny_1.8.1.1           Rtsne_0.17             
#>  [61] evaluate_0.23           BiocFileCache_2.12.0    urlchecker_1.0.1       
#>  [64] Biostrings_2.72.0       ExperimentHub_2.12.0    filelock_1.0.3         
#>  [67] BiocManager_1.30.22     pillar_1.9.0            lsa_0.73.3             
#>  [70] KernSmooth_2.23-22      generics_0.1.3          BiocVersion_3.19.1     
#>  [73] ggplot2_3.5.1           munsell_0.5.1           scales_1.3.0           
#>  [76] gtools_3.9.5            xtable_1.8-4            glue_1.7.0             
#>  [79] tools_4.4.0             AnnotationHub_3.12.0    locfit_1.5-9.9         
#>  [82] fs_1.6.4                grid_4.4.0              AnnotationDbi_1.66.0   
#>  [85] colorspace_2.1-0        GenomeInfoDbData_1.2.12 cli_3.6.2              
#>  [88] rappdirs_0.3.3          fansi_1.0.6             S4Arrays_1.4.0         
#>  [91] dplyr_1.1.4             gtable_0.3.5            sass_0.4.9             
#>  [94] digest_0.6.35           SparseArray_1.4.0       htmlwidgets_1.6.4      
#>  [97] memoise_2.0.1           htmltools_0.5.8.1       lifecycle_1.0.4        
#> [100] httr_1.4.7              squash_1.0.9            mime_0.12              
#> [103] bit64_4.0.5