## ----installation, eval = FALSE----------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("plasmut") ## ----setup, message=FALSE----------------------------------------------------- library(magrittr) library(tidyverse) library(plasmut) knitr::opts_chunk$set(cache=TRUE) lo <- function(p) log(p/(1-p)) ## ----data_wrangling----------------------------------------------------------- ## sample data p53 <- tibble(y=c(4, 0), n=c(1000, 600), analyte=c("plasma", "buffy coat"), mutation="TP53", sample_id="id1") dat <- p53 %>% unite("uid", c(sample_id, mutation), remove=FALSE) %>% group_by(uid) %>% nest() ## required format for plasmut dat ## ----parameters--------------------------------------------------------------- ## Parameters param.list <- list(ctc=list(a=1, b=10e3), ctdna=list(a=1, b=9), chip=list(a=1, b=9), montecarlo.samples=50e3, prior.weight=0.1) ## ----montecarlo--------------------------------------------------------------- stats <- importance_sampler(dat$data[[1]], param.list) ## Just the mutation-level summary statistics (marginal likelihoods and bayes factors) importance_sampler(dat$data[[1]], param.list, save_montecarlo=FALSE) ## ----prior.weight, cache=TRUE------------------------------------------------- fun <- function(montecarlo.samples, data, param.list, prior.weight=0.1){ param.list$montecarlo.samples <- montecarlo.samples param.list$prior.weight <- prior.weight res <- importance_sampler(data, param.list, save_montecarlo=FALSE) %>% mutate(montecarlo.samples=montecarlo.samples, prior.weight=param.list$prior.weight) res } fun2 <- function(montecarlo.samples, data, param.list, prior.weight=0.1, nreps=100){ res <- replicate(nreps, fun(montecarlo.samples, data, param.list, prior.weight=prior.weight), simplify=FALSE) %>% do.call(bind_rows, .) %>% group_by(montecarlo.samples, prior.weight) %>% summarize(mean_bf=mean(bayesfactor), sd_bf=sd(bayesfactor), .groups="drop") res } S <- c(100, 1000, seq(10e3, 50e3, by=10000)) results <- S %>% map_dfr(fun2, data=dat$data[[1]], param.list=param.list) results2 <- S %>% map_dfr(fun2, data=dat$data[[1]], param.list=param.list, prior.weight=1) combined <- bind_rows(results, results2) ## ----standardev, fig.width=8, fig.height=5------------------------------------ combined %>% mutate(prior.weight=factor(prior.weight)) %>% ggplot(aes(montecarlo.samples, sd_bf)) + geom_point(aes(color=prior.weight)) + geom_line(aes(group=prior.weight, color=prior.weight)) + scale_y_log10() + theme_bw(base_size=16) + xlab("Monte Carlo samples") + ylab("Standard deviation of\n log Bayes Factor") ## ----means, fig.width=8, fig.height=5----------------------------------------- combined %>% mutate(prior.weight=factor(prior.weight)) %>% filter(montecarlo.samples > 100) %>% ggplot(aes(prior.weight, mean_bf)) + geom_point() + theme_bw(base_size=16) + ylab("Mean log Bayes factor") + xlab("Prior weight") ## ----vanterve----------------------------------------------------------------- data(crcseq, package="plasmut") crcseq %>% select(-position) ## ----vanterve-importance-sampling--------------------------------------------- params <- list(ctdna = list(a = 1, b = 9), ctc = list(a = 1, b = 10^3), chip = list(a= 1, b = 9), montecarlo.samples = 50e3, prior.weight = 0.1) muts <- unite(crcseq, "uid", c(patient, gene), remove = FALSE) %>% group_by(uid) %>% nest() #Each element of the data column contains a table with the variant and total allele counts in plasma and buffy coat. #Run the importance sampler muts$IS <- muts$data %>% map(importance_sampler, params) fun <- function(x){ result <- x$data %>% select(-position) %>% mutate(bayes_factor = x$IS$bayesfactor$bayesfactor) return(result) } bf <- apply(muts, 1, fun) bf %>% do.call(rbind, .) %>% as_tibble() %>% select(patient, gene, aa, bayes_factor) %>% rename(log_bf=bayes_factor) %>% distinct() ## ----session------------------------------------------------------------------ sessionInfo()