params <- list(demo_metadata = TRUE) ## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) root_dir <- knitr::opts_knit$get("root.dir") if (!is.null(root_dir)){ # This hack fixes the relative image paths. # See https://github.com/rstudio/rmarkdown/issues/2473 knitr::opts_knit$set( output.dir = root_dir ) } proj_root <- rprojroot::find_package_root_file() |> normalizePath() # Utility function for figures to force them to have the correct path find_figure <- function(names){ rprojroot::find_package_root_file() |> file.path("man", "figures", names) } METADATA_URL = if (params$demo_metadata) CuratedAtlasQueryR::SAMPLE_DATABASE_URL else CuratedAtlasQueryR::DATABASE_URL ## ----echo=FALSE, out.height = c("139px"), out.width = "120x"------------------ find_figure("logo.png") |> knitr::include_graphics() ## ----echo=FALSE, out.height = c("58px"), out.width = c("155x", "129px", "202px", "219px", "180px")---- c( "svcf_logo.jpeg", "czi_logo.png", "bioconductor_logo.jpg", "vca_logo.png", "nectar_logo.png" ) |> find_figure() |> knitr::include_graphics() ## ----eval=FALSE--------------------------------------------------------------- # devtools::install_github("stemangiola/CuratedAtlasQueryR") ## ----message=FALSE, warning=FALSE--------------------------------------------- library(CuratedAtlasQueryR) ## ----------------------------------------------------------------------------- # Note: in real applications you should use the default value of remote_url metadata <- get_metadata(remote_url = METADATA_URL) metadata ## ----------------------------------------------------------------------------- metadata |> dplyr::distinct(tissue, file_id) ## ----------------------------------------------------------------------------- single_cell_counts = metadata |> dplyr::filter( ethnicity == "African" & stringr::str_like(assay, "%10x%") & tissue == "lung parenchyma" & stringr::str_like(cell_type, "%CD4%") ) |> get_single_cell_experiment() single_cell_counts ## ----------------------------------------------------------------------------- single_cell_counts = metadata |> dplyr::filter( ethnicity == "African" & stringr::str_like(assay, "%10x%") & tissue == "lung parenchyma" & stringr::str_like(cell_type, "%CD4%") ) |> get_single_cell_experiment(assays = "cpm") single_cell_counts ## ----------------------------------------------------------------------------- single_cell_counts = metadata |> dplyr::filter( ethnicity == "African" & stringr::str_like(assay, "%10x%") & tissue == "lung parenchyma" & stringr::str_like(cell_type, "%CD4%") ) |> get_single_cell_experiment(assays = "cpm", features = "PUM1") single_cell_counts ## ----------------------------------------------------------------------------- single_cell_counts_seurat = metadata |> dplyr::filter( ethnicity == "African" & stringr::str_like(assay, "%10x%") & tissue == "lung parenchyma" & stringr::str_like(cell_type, "%CD4%") ) |> get_seurat() single_cell_counts_seurat ## ----------------------------------------------------------------------------- single_cell_counts |> saveRDS("single_cell_counts.rds") ## ----------------------------------------------------------------------------- single_cell_counts |> HDF5Array::saveHDF5SummarizedExperiment("single_cell_counts", replace = TRUE) ## ----results='hide'----------------------------------------------------------- suppressPackageStartupMessages({ library(ggplot2) }) # Plots with styling counts <- metadata |> # Filter and subset dplyr::filter(cell_type_harmonised == "cd14 mono") |> dplyr::filter(file_id_db != "c5a05f23f9784a3be3bfa651198a48eb") |> # Get counts per million for HCA-A gene get_single_cell_experiment(assays = "cpm", features = "HLA-A") |> suppressMessages() |> # Add feature to table tidySingleCellExperiment::join_features("HLA-A", shape = "wide") |> # Rank x axis tibble::as_tibble() # Plot by disease counts |> dplyr::with_groups(disease, ~ .x |> dplyr::mutate(median_count = median(`HLA.A`, rm.na=TRUE))) |> # Plot ggplot(aes(forcats::fct_reorder(disease, median_count,.desc = TRUE), `HLA.A`,color = file_id)) + geom_jitter(shape=".") + # Style guides(color="none") + scale_y_log10() + theme_bw() + theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) + xlab("Disease") + ggtitle("HLA-A in CD14 monocytes by disease") ## ----echo=FALSE--------------------------------------------------------------- find_figure("HLA_A_disease_plot.png") |> knitr::include_graphics() ## ----results='hide'----------------------------------------------------------- # Plot by tissue counts |> dplyr::with_groups(tissue_harmonised, ~ .x |> dplyr::mutate(median_count = median(`HLA.A`, rm.na=TRUE))) |> # Plot ggplot(aes(forcats::fct_reorder(tissue_harmonised, median_count,.desc = TRUE), `HLA.A`,color = file_id)) + geom_jitter(shape=".") + # Style guides(color="none") + scale_y_log10() + theme_bw() + theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1)) + xlab("Tissue") + ggtitle("HLA-A in CD14 monocytes by tissue") + theme(legend.position = "none") ## ----echo=FALSE--------------------------------------------------------------- find_figure("HLA_A_tissue_plot.png") |> knitr::include_graphics() ## ----------------------------------------------------------------------------- metadata |> # Filter and subset dplyr::filter(cell_type_harmonised=="nk") |> # Get counts per million for HCA-A gene get_single_cell_experiment(assays = "cpm", features = "HLA-A") |> suppressMessages() |> # Plot tidySingleCellExperiment::join_features("HLA-A", shape = "wide") |> ggplot(aes(tissue_harmonised, `HLA.A`, color = file_id)) + theme_bw() + theme( axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1), legend.position = "none" ) + geom_jitter(shape=".") + xlab("Tissue") + ggtitle("HLA-A in nk cells by tissue") ## ----------------------------------------------------------------------------- harmonised <- metadata |> dplyr::filter(tissue == "kidney blood vessel") unharmonised <- get_unharmonised_metadata(harmonised) unharmonised ## ----------------------------------------------------------------------------- dplyr::pull(unharmonised) |> head(2) ## ----------------------------------------------------------------------------- sessionInfo()