## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = FALSE
)

## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(miloR)
library(SingleCellExperiment)
library(scater)
library(scran)
library(dplyr)
library(patchwork)
library(scRNAseq)
library(scuttle)
library(irlba)
library(BiocParallel)
library(ggplot2)
library(sparseMatrixStats)

## -----------------------------------------------------------------------------
# uncomment the row below to allow multi-processing and comment out the SerialParam line.
# bpparam <- MulticoreParam(workers=4)
bpparam <- SerialParam()
register(bpparam)

pbmc.sce <- KotliarovPBMCData(mode="rna", ensembl=TRUE) # download the PBMC data from Kotliarov _et al._

# downsample cells to reduce compute time
prop.cells <- 0.75
n.cells <- floor(ncol(pbmc.sce) * prop.cells)
set.seed(42)
keep.cells <- sample(colnames(pbmc.sce), size=n.cells)
pbmc.sce <- pbmc.sce[, colnames(pbmc.sce) %in% keep.cells]


# downsample the number of samples
colData(pbmc.sce)$ObsID <- paste(colData(pbmc.sce)$tenx_lane, colData(pbmc.sce)$sampleid, sep="_")
n.samps <- 80
keep.samps <- sample(unique(colData(pbmc.sce)$ObsID), size=n.samps)
keep.cells <- rownames(colData(pbmc.sce))[colData(pbmc.sce)$ObsID %in% keep.samps]
pbmc.sce <- pbmc.sce[, colnames(pbmc.sce) %in% keep.cells]

pbmc.sce

## -----------------------------------------------------------------------------
set.seed(42)
# remove sparser cells
drop.cells <- colSums(counts(pbmc.sce)) < 1000
pbmc.sce <- computePooledFactors(pbmc.sce[, !drop.cells], BPPARAM=bpparam)
pbmc.sce <- logNormCounts(pbmc.sce)

pbmc.hvg <- modelGeneVar(pbmc.sce)
pbmc.hvg$FDR[is.na(pbmc.hvg$FDR)] <- 1

pbmc.sce <- runPCA(pbmc.sce, subset_row=rownames(pbmc.sce)[pbmc.hvg$FDR < 0.1], scale=TRUE, ncomponents=50, assay.type="logcounts", name="PCA", BPPARAM=bpparam)
pbmc.sce

## ----fig.height=4.1, fig.width=10.5-------------------------------------------
set.seed(42)
pbmc.sce <- runUMAP(pbmc.sce, n_neighbors=30, pca=50, BPPARAM=bpparam) # add a UMAP for plotting results later

pbmc.milo <- Milo(pbmc.sce) # from the SCE object
reducedDim(pbmc.milo, "UMAP") <- reducedDim(pbmc.sce, "UMAP")

plotUMAP(pbmc.milo, colour_by="adjmfc.time") + plotUMAP(pbmc.milo, colour_by="sampleid")

## -----------------------------------------------------------------------------
set.seed(42)
# we build KNN graph
pbmc.milo <- buildGraph(pbmc.milo, k = 60, d = 30)
pbmc.milo <- makeNhoods(pbmc.milo, prop = 0.05, k = 60, d=30, refined = TRUE, refinement_scheme="graph") # make nhoods using graph-only as this is faster
colData(pbmc.milo)$ObsID <- paste(colData(pbmc.milo)$tenx_lane, colData(pbmc.milo)$sampleid, sep="_")
pbmc.milo <- countCells(pbmc.milo, meta.data = data.frame(colData(pbmc.milo)), samples="ObsID") # make the nhood X sample counts matrix
pbmc.milo

## -----------------------------------------------------------------------------
plotNhoodSizeHist(pbmc.milo)

## -----------------------------------------------------------------------------
pbmc.design <- data.frame(colData(pbmc.milo))[,c("tenx_lane", "adjmfc.time", "sample", "sampleid", "timepoint", "ObsID")]
pbmc.design <- distinct(pbmc.design)
rownames(pbmc.design) <- pbmc.design$ObsID
## Reorder rownames to match columns of nhoodCounts(milo)
pbmc.design <- pbmc.design[colnames(nhoodCounts(pbmc.milo)), , drop=FALSE]
table(pbmc.design$adjmfc.time)

## ----warning=FALSE------------------------------------------------------------
set.seed(42)
rownames(pbmc.design) <- pbmc.design$ObsID

da_results <- testNhoods(pbmc.milo, design = ~ adjmfc.time + (1|tenx_lane), design.df = pbmc.design, fdr.weighting="graph-overlap",
                         glmm.solver="Fisher", REML=TRUE, norm.method="TMM", BPPARAM = bpparam, fail.on.error=FALSE)
table(da_results$SpatialFDR < 0.1)

## -----------------------------------------------------------------------------
which(is.na(da_results$logFC))

## -----------------------------------------------------------------------------
which(checkSeparation(pbmc.milo, design.df=pbmc.design, condition="adjmfc.time", min.val=10))

## -----------------------------------------------------------------------------
plotNhoodCounts(pbmc.milo, which(checkSeparation(pbmc.milo, design.df=pbmc.design, condition="adjmfc.time", min.val=10)), 
                design.df=pbmc.design, condition="adjmfc.time")

## -----------------------------------------------------------------------------
head(da_results[!is.na(da_results$logFC), ])

## -----------------------------------------------------------------------------
ggplot(da_results, aes(x=Converged, y=`tenx_lane_variance`)) +
    geom_boxplot() + 
    scale_y_log10() +
    NULL

## ----fig.width=10, fig.height=4.5---------------------------------------------
pbmc.milo <- buildNhoodGraph(pbmc.milo, overlap=25)

# we need to subset the plotting results as it can't handle the NAs internally.
plotUMAP(pbmc.milo, colour_by="adjmfc.time") + plotNhoodGraphDA(pbmc.milo, da_results,
                                                                alpha=0.1) +
  plot_layout(guides="auto" )

## -----------------------------------------------------------------------------
set.seed(42)
# we need to use place the test variable at the end of the formula
glm_results <- testNhoods(pbmc.milo, design = ~ tenx_lane + adjmfc.time, design.df = pbmc.design, fdr.weighting="graph-overlap",
                          REML=TRUE, norm.method="TMM", BPPARAM = bpparam)
table(glm_results$SpatialFDR < 0.1)

## ----fig.width=10, fig.height=4.5---------------------------------------------
plotUMAP(pbmc.milo, colour_by="adjmfc.time") + plotNhoodGraphDA(pbmc.milo, glm_results, alpha=0.1) +
  plot_layout(guides="auto" )

## -----------------------------------------------------------------------------
comp.da <- merge(da_results, glm_results, by='Nhood')
comp.da$Sig <- "none"
comp.da$Sig[comp.da$SpatialFDR.x < 0.1 & comp.da$SpatialFDR.y < 0.1] <- "Both"
comp.da$Sig[comp.da$SpatialFDR.x >= 0.1 & comp.da$SpatialFDR.y < 0.1] <- "GLM"
comp.da$Sig[comp.da$SpatialFDR.x < 0.1 & comp.da$SpatialFDR.y >= 0.1] <- "GLMM"

ggplot(comp.da, aes(x=logFC.x, y=logFC.y)) +
    geom_point(data=comp.da[, c("logFC.x", "logFC.y")], aes(x=logFC.x, y=logFC.y),
               colour='grey80', size=1) +
    geom_point(aes(colour=Sig)) +
    labs(x="GLMM LFC", y="GLM LFC") +
    facet_wrap(~Sig) +
    NULL

## -----------------------------------------------------------------------------
sessionInfo()