## ----eval = FALSE------------------------------------------------------------- # if (!"BiocManager" %in% rownames(installed.packages())) # install.packages("BiocManager", repos = "https://CRAN.R-project.org") # BiocManager::install("cellxgenedp") ## ----eval = FALSE------------------------------------------------------------- # if (!"remotes" %in% rownames(installed.packages())) # install.packages("remotes", repos = "https://CRAN.R-project.org") # remotes::install_github("mtmorgan/cellxgenedp") ## ----eval = FALSE------------------------------------------------------------- # pkgs <- c("tidyr", "zellkonverter", "SingleCellExperiment", "HDF5Array") # required_pkgs <- pkgs[!pkgs %in% rownames(installed.packages())] # BiocManager::install(required_pkgs) ## ----message = FALSE---------------------------------------------------------- library(zellkonverter) library(SingleCellExperiment) # load early to avoid masking dplyr::count() library(dplyr) library(cellxgenedp) ## ----eval = FALSE------------------------------------------------------------- # cxg() ## ----------------------------------------------------------------------------- db <- db() ## ----------------------------------------------------------------------------- db ## ----------------------------------------------------------------------------- collections(db) datasets(db) files(db) ## ----------------------------------------------------------------------------- collection_with_most_datasets <- datasets(db) |> count(collection_id, sort = TRUE) |> slice(1) ## ----------------------------------------------------------------------------- left_join( collection_with_most_datasets |> select(collection_id), collections(db), by = "collection_id" ) |> glimpse() ## ----------------------------------------------------------------------------- left_join( collection_with_most_datasets |> select(collection_id), datasets(db), by = "collection_id" ) ## ----------------------------------------------------------------------------- datasets(db) |> select(where(is.list)) ## ----facets------------------------------------------------------------------- facets(db, "assay") facets(db, "self_reported_ethnicity") facets(db, "sex") ## ----african_american_female-------------------------------------------------- african_american_female <- datasets(db) |> filter( facets_filter(assay, "ontology_term_id", "EFO:0009922"), facets_filter(self_reported_ethnicity, "label", "African American"), facets_filter(sex, "label", "female") ) ## ----------------------------------------------------------------------------- african_american_female |> summarise(total_cell_count = sum(cell_count)) ## ----------------------------------------------------------------------------- ## collections left_join( african_american_female |> select(collection_id) |> distinct(), collections(db), by = "collection_id" ) ## ----------------------------------------------------------------------------- title_of_interest <- paste( "A single-cell atlas of the healthy breast tissues reveals clinically", "relevant clusters of breast epithelial cells" ) collection_of_interest <- collections(db) |> dplyr::filter(startsWith(name, title_of_interest)) collection_of_interest |> glimpse() ## ----------------------------------------------------------------------------- collection_id_of_interest <- pull(collection_of_interest, "collection_id") publisher_metadata(db) |> filter(collection_id == collection_id_of_interest) |> glimpse() authors(db) |> filter(collection_id == collection_id_of_interest) ## ----------------------------------------------------------------------------- external_links <- links(db) external_links external_links |> count(link_type) external_links |> filter(collection_id == collection_id_of_interest) ## ----------------------------------------------------------------------------- doi_of_interest <- "https://doi.org/10.1016/j.stem.2018.12.011" links(db) |> filter(link_url == doi_of_interest) |> left_join(collections(db), by = "collection_id") |> glimpse() ## ----eval = FALSE------------------------------------------------------------- # african_american_female |> # ## use criteria to identify a single dataset (here just the # ## 'first' dataset), then visualize # slice(1) |> # datasets_visualize() ## ----selected_files----------------------------------------------------------- selected_files <- left_join( african_american_female |> select(dataset_id), files(db), by = "dataset_id" ) ## ----local_file_from_dataset_id----------------------------------------------- local_file <- selected_files |> filter( dataset_id == "de985818-285f-4f59-9dbd-d74968fddba3", filetype == "H5AD" ) |> files_download(dry.run = FALSE) basename(local_file) ## ----readH5AD----------------------------------------------------------------- h5ad <- readH5AD(local_file, use_hdf5 = TRUE, reader = "R") h5ad ## ----------------------------------------------------------------------------- h5ad |> colData(h5ad) |> as_tibble() |> count(sex, donor_id) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()