## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(comment = "#>", collapse = TRUE, message = FALSE, warning = FALSE) ## ----------------------------------------------------------------------------- suppressPackageStartupMessages({ library(OmicsMLRepoR) library(dplyr) library(curatedMetagenomicData) library(cBioPortalData) }) ## ----------------------------------------------------------------------------- cmd <- getMetadata("cMD") cmd ## ----------------------------------------------------------------------------- cbio <- getMetadata("cBioPortal") cbio ## ----echo=FALSE--------------------------------------------------------------- colnames(cmd)[grep("_ontology_term_id", colnames(cmd))] %>% gsub("_ontology_term_id", "", .) ## ----------------------------------------------------------------------------- ## Information spread out in two different columns nrow(sampleMetadata |> filter(study_condition == "CRC")) nrow(sampleMetadata |> filter(disease == "CRC")) ## Case sensitive nrow(sampleMetadata |> filter(study_condition == "CRC")) nrow(sampleMetadata |> filter(study_condition == "crc")) ## Synonyms not covered nrow(sampleMetadata |> filter(study_condition == "Colorectal Carcinoma")) nrow(sampleMetadata |> filter(study_condition == "Colorectal Cancer")) ## ----------------------------------------------------------------------------- nrow(cmd |> tree_filter(disease, "Colorectal Carcinoma")) nrow(cmd |> tree_filter(disease, "colorectal carcinoma")) ## ----------------------------------------------------------------------------- syn_res1 <- cmd |> tree_filter(disease, "CRC") syn_res2 <- cmd |> tree_filter(disease, "Colorectal Cancer") syn_res3 <- cmd |> tree_filter(disease, "Colorectal Carcinoma") nrow(syn_res1) nrow(syn_res2) nrow(syn_res3) ## ----------------------------------------------------------------------------- unique(syn_res1$disease) unique(syn_res2$disease) unique(syn_res3$disease) ## ----------------------------------------------------------------------------- onto_res <- cmd |> tree_filter(disease, "Intestinal Disorder") unique(onto_res$disease) ## ----------------------------------------------------------------------------- res_or <- cmd %>% tree_filter(disease, c("migraine", "diabetes"), "OR") ## ----------------------------------------------------------------------------- res_and <- cmd %>% tree_filter(disease, c("migraine", "diabetes"), "AND") res_not <- cmd %>% tree_filter(disease, c("migraine", "diabetes"), "NOT") ## ----------------------------------------------------------------------------- res_or_below30 <- cmd %>% filter(age_years < 30) %>% tree_filter(disease, c("migraine", "diabetes")) ## ----------------------------------------------------------------------------- cmd_biomarker <- cmd %>% filter(!is.na(biomarker)) %>% select(curation_id, biomarker) wtb <- getWideMetaTb(cmd_biomarker, "biomarker") head(wtb) ## ----------------------------------------------------------------------------- ltb <- getLongMetaTb(cmd, targetCols = "target_condition") dim(cmd) dim(ltb) ## ----debug_needed, echo=FALSE------------------------------------------------- cmd_sub <- tree_filter(cmd, target_condition, "Alzheimer's disease") ## ----------------------------------------------------------------------------- cmd_dat <- cmd %>% tree_filter(col = "disease", "Type 2 Diabetes Mellitus") %>% filter(sex == "Female") %>% filter(age_group == "Elderly") %>% returnSamples("relative_abundance", rownames = "short") ## ----------------------------------------------------------------------------- cbio_sub <- cbio %>% getLongMetaTb("treatment_name", "<;>") %>% filter(treatment_name == "Fluorouracil") %>% filter(age_at_diagnosis > 50) %>% filter(sex == "Female") %>% getShortMetaTb(idCols = "curation_id", targetCols = "treatment_name") dim(cbio_sub) studies <- unique(cbio_sub$studyId) studies ## ----------------------------------------------------------------------------- cbio_api <- cBioPortal() resAll <- as.list(vector(length = length(studies))) for (i in seq_along(studies)) { study <- studies[i] samples <- cbio_sub %>% filter(studyId == study) %>% pull(sampleId) res <- cBioPortalData( api = cbio_api, by = "hugoGeneSymbol", studyId = study, sampleIds = samples, genePanelId = "IMPACT341" ) resAll[[i]] <- res } ## ----------------------------------------------------------------------------- sessionInfo()