## ----lib, message=FALSE----------------------------------------------------
library(CNPBayes)
library(SummarizedExperiment)
library(ggplot2)
library(tidyverse)

## ----find_cnps-------------------------------------------------------------
path <- system.file("extdata", package="CNPBayes")
se <- readRDS(file.path(path, "simulated_se.rds"))
grl <- readRDS(file.path(path, "grl_deletions.rds"))

## ----summary, message=FALSE------------------------------------------------
##cnv.region <- consensusCNP(grl, max.width=5e6)
cnv.region <- readRDS(file.path(path, "cnv_region.rds"))
i <- subjectHits(findOverlaps(cnv.region, rowRanges(se)))
med.summary <- matrixStats::colMedians(assays(se)[["cn"]][i, ], na.rm=TRUE)

## ----model_construction----------------------------------------------------
mp <- McmcParams(nStarts=5, burnin=500, iter=1000, thin=1)

## ----gibbs-----------------------------------------------------------------
model.list <- gibbs(model="SB", dat=med.summary, k_range=c(2, 3), mp=mp,
                    max_burnin=400, top=2)

## ----ggfunctions-----------------------------------------------------------
model <- model.list[[1]]
model
chains <- ggChains(model)
chains[[1]]
chains[[2]]

## ----posteriorPredictive---------------------------------------------------
## only one batch
full.data <- tibble(y=med.summary,
                    batch=paste("Batch 1"))
predictive <- posteriorPredictive(model) %>%
  mutate(batch=paste("Batch", batch),
         component=factor(component))
predictive$n <- 4000
predictive$component <- factor(predictive$component)
ggplot(predictive, aes(x=y, n_facet=n,
                       y=..count../n_facet,
                       fill=component)) +
  geom_histogram(data=full.data, aes(y, ..density..),
                 bins=400,
                 inherit.aes=FALSE,
                 color="gray70",
                 fill="gray70",
                 alpha=0.1) +
  geom_density(adjust=1, alpha=0.4, size=0.75, color="gray30") +
  ## show marginal density
  theme(panel.background=element_rect(fill="white"),
        axis.line=element_line(color="black"),
        legend.position="bottom",
        legend.direction="horizontal") +
  scale_y_sqrt() +
  xlab("average copy number") +
  ylab("density") +
  guides(color=FALSE)

## ----cn_model--------------------------------------------------------------
cn.model <- CopyNumberModel(model)
ggMixture(cn.model)

## ----probCopyNumber--------------------------------------------------------
probs <- probCopyNumber(cn.model)
head(probs)

## ----downsample------------------------------------------------------------
mb <- MultiBatchModelExample
full.data <- tibble(medians=y(mb),
                    plate=batch(mb),
                    batch_orig=as.character(batch(mb)))
partial.data <- downSample(full.data,
                           size=1000,
                           min.batchsize=50)
## confusingly, the downsampled data uses a different indexing for batch
plate.mapping <- partial.data %>%
  select(c(plate, batch_index)) %>%
  group_by(plate) %>%
  summarize(batch_index=unique(batch_index))

## ----fit_downsample--------------------------------------------------------
model <- gibbs(model="MB", k_range=c(3, 3),
               dat=partial.data$medians,
               batches=partial.data$batch_index,
               mp=mp)[[1]]

## ----upSample--------------------------------------------------------------
full.data2 <- left_join(full.data, plate.mapping, by="plate")
model.up <- upSample2(full.data2, model)
model.up
nrow(probz(model.up))
head(round(probz(model.up)), 2)

## ----copy-number-model-----------------------------------------------------
cn.model <- CopyNumberModel(model.up)
mapping(cn.model)
round(head(probz(cn.model)), 2)
round(head(probCopyNumber(cn.model)), 2)