params <-
list(test = FALSE)

## ----setup, include=FALSE, message=FALSE--------------------------------------
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(BiocStyle)

## ----eval=FALSE---------------------------------------------------------------
#  if (!require("BiocManager", quietly = TRUE)) {
#    install.packages("BiocManager")
#  }
#  
#  BiocManager::install("spicyWorkflow")

## ----load libraries, echo=FALSE, results="hide", warning=FALSE----------------
suppressPackageStartupMessages({
  library(cytomapper)
  library(dplyr)
  library(ggplot2)
  library(simpleSeg)
  library(FuseSOM)
  library(ggpubr)
  library(scater)
  library(spicyR)
  library(ClassifyR)
  library(lisaClust)
  library(tidySingleCellExperiment)
})

## ----eval=FALSE---------------------------------------------------------------
#  library(cytomapper)
#  library(dplyr)
#  library(ggplot2)
#  library(simpleSeg)
#  library(FuseSOM)
#  library(ggpubr)
#  library(scater)
#  library(spicyR)
#  library(ClassifyR)
#  library(lisaClust)
#  library(tidySingleCellExperiment)

## ----set parameters-----------------------------------------------------------
use_mc <- FALSE

if (use_mc) {
  nCores <- max(parallel::detectCores() - 1, 1)
} else {
  nCores <- 2
}
BPPARAM <- simpleSeg:::generateBPParam(nCores)

theme_set(theme_classic())

## ----load images--------------------------------------------------------------
pathToImages <- system.file("extdata/images", package = "spicyWorkflow")

# Store images in a CytoImageList on_disk as h5 files to save memory.
images <- cytomapper::loadImages(
  pathToImages,
  single_channel = TRUE,
  on_disk = TRUE,
  h5FilesPath = HDF5Array::getHDF5DumpDir(),
  BPPARAM = BPPARAM
)
gc()

## ----load clincal data--------------------------------------------------------
# Read in clinical data, manipulate imageID and select columns
clinical <- read.csv(
  system.file(
    "extdata/1-s2.0-S0092867421014860-mmc1.csv",
    package = "spicyWorkflow"
  )
)

clinical <- clinical |>
  mutate(imageID = paste0(
    "Point", PointNumber, "_pt", Patient_ID, "_", TMAD_Patient
  ))

image_idx <- grep("normal", clinical$Tissue_Type)
clinical$imageID[image_idx] <- paste0(clinical$imageID[image_idx], "_Normal")

clinicalVariables <- c(
  "imageID", "Patient_ID", "Status", "Age", "SUBTYPE", "PAM50", "Treatment",
  "DCIS_grade", "Necrosis"
)
rownames(clinical) <- clinical$imageID

## ----add clinical data--------------------------------------------------------
# Add the clinical data to mcols of images.
mcols(images) <- clinical[names(images), clinicalVariables]

## ----segment------------------------------------------------------------------
# Generate segmentation masks
masks <- simpleSeg(
  images,
  nucleus = c("HH3"),
  cellBody = "dilate",
  transform = "sqrt",
  sizeSelection = 40,
  discSize = 2,
  pca = TRUE,
  cores = nCores
)

## ----visualise segmentation---------------------------------------------------
# Visualise segmentation performance one way.
EBImage::display(colorLabels(masks[[1]]))

## -----------------------------------------------------------------------------
# Visualise segmentation performance another way.
cytomapper::plotPixels(
  image = images[1],
  mask = masks[1],
  img_id = "imageID",
  colour_by = c("PanKRT", "GLUT1", "HH3", "CD3", "CD20"),
  display = "single",
  colour = list(
    HH3 = c("black", "blue"),
    CD3 = c("black", "purple"),
    CD20 = c("black", "green"),
    GLUT1 = c("black", "red"),
    PanKRT = c("black", "yellow")
  ),
  bcg = list(
    HH3 = c(0, 1, 1.5),
    CD3 = c(0, 1, 1.5),
    CD20 = c(0, 1, 1.5),
    GLUT1 = c(0, 1, 1.5),
    PanKRT = c(0, 1, 1.5)
  ),
  legend = NULL
)

## -----------------------------------------------------------------------------
# Summarise the expression of each marker in each cell
cells <- cytomapper::measureObjects(
  masks,
  images,
  img_id = "imageID",
  BPPARAM = BPPARAM
)

cells <- cells |> filter(!imageID %in% c('Point6103_pt1008_20624', 'Point6203_pt1107_31568', 'Point6206_pt1197_31571', 'Point6201_pt1027_20597'))

## ----fig.width=5, fig.height=5------------------------------------------------
# Extract marker data and bind with information about images
df <- as.data.frame(cbind(colData(cells), t(assay(cells, "counts"))))

# Plots densities of CK7 for each image.
ggplot(df, aes(x = CK7, colour = imageID)) +
  geom_density() +
  theme(legend.position = "none")

## ----fig.width=5, fig.height=5------------------------------------------------
# Transform and normalise the marker expression of each cell type.
# Use a square root transform, then trimmed the 99 quantile
cells <- normalizeCells(cells,
  transformation = "asinh",
  method = c("trim99", "minMax", "PC1"),
  assayIn = "counts",
  cores = nCores
)

# Extract normalised marker information.
norm_df <- as.data.frame(cbind(colData(cells), t(assay(cells, "norm"))))


# Plots densities of normalised CK7 for each image.
ggplot(norm_df, aes(x = CK7, colour = imageID)) +
  geom_density() +
  theme(legend.position = "none")

## ----FuseSOM------------------------------------------------------------------
# The markers used in the original publication to gate cell types.
useMarkers <- c(
  "PanKRT", "ECAD", "CK7", "VIM", "FAP", "CD31", "CK5", "SMA",
  "CD45", "CD4", "CD3", "CD8", "CD20", "CD68", "CD14", "CD11c",
  "HLADRDPDQ", "MPO", "Tryptase"
)

# Set seed.
set.seed(51773)

# Generate SOM and cluster cells into 20 groups.
cells <- runFuseSOM(
  cells,
  markers = useMarkers,
  assay = "norm",
  numClusters = 24
)

## -----------------------------------------------------------------------------
# Visualise marker expression in each cluster.
scater::plotGroupedHeatmap(
  cells,
  features = useMarkers,
  group = "clusters",
  exprs_values = "norm",
  center = TRUE,
  scale = TRUE,
  zlim = c(-3, 3),
  cluster_rows = FALSE,
  block = "clusters"
)

## -----------------------------------------------------------------------------
# Generate metrics for estimating the number of clusters.
# As I've already run runFuseSOM I don't need to run generateSOM().
cells <- estimateNumCluster(cells, kSeq = 2:30)
optiPlot(cells, method = "gap")

## -----------------------------------------------------------------------------
# Check cluster frequencies.
colData(cells)$clusters |>
  table() |>
  sort()

## -----------------------------------------------------------------------------
set.seed(51773)
# Perform dimension reduction using UMP.
cells <- scater::runUMAP(
  cells,
  subset_row = useMarkers,
  exprs_values = "norm"
)

# Select a subset of images to plot.
someImages <- unique(colData(cells)$imageID)[c(1, 10, 20, 40, 50, 60)]


# UMAP by cell type cluster.
scater::plotReducedDim(
  cells[, colData(cells)$imageID %in% someImages],
  dimred = "UMAP",
  colour_by = "clusters"
)

## -----------------------------------------------------------------------------
# Select cells which belong to individuals with progressor status.
cellsToUse <- cells$Status %in% c("nonprogressor", "progressor")

# Perform simple wicoxon rank sum tests on the columns of the proportion matrix.
testProp <- colTest(cells[, cellsToUse],
  condition = "Status",
  feature = "clusters"
)

testProp

## -----------------------------------------------------------------------------
imagesToUse <- rownames(clinical)[clinical[, "Status"] %in% c("nonprogressor", "progressor")]

prop <- getProp(cells, feature = "clusters")
clusterToUse <- rownames(testProp)[1]

boxplot(prop[imagesToUse, clusterToUse] ~ clinical[imagesToUse, "Status"])

## -----------------------------------------------------------------------------
# Test for changes in pair-wise spatial relationships between cell types.
spicyTest <- spicy(
  cells[, cellsToUse],
  condition = "Status",
  cellType = "clusters",
  imageID = "imageID",
  spatialCoords = c("m.cx", "m.cy"),
  Rs = c(20, 50, 100),
  sigma = 50,
  BPPARAM = BPPARAM
)

topPairs(spicyTest, n = 10)

## -----------------------------------------------------------------------------
# Visualise which relationships are changing the most.
signifPlot(
  spicyTest,
  breaks = c(-1.5, 3, 0.5)
)

## -----------------------------------------------------------------------------
set.seed(51773)

# Cluster cells into spatial regions with similar composition.
cells <- lisaClust(
  cells,
  k = 5,
  Rs = c(20, 50, 100),
  sigma = 50,
  spatialCoords = c("m.cx", "m.cy"),
  cellType = "clusters",
  BPPARAM = BPPARAM
)

## ----fig.height=5, fig.width=5------------------------------------------------
# Visualise the enrichment of each cell type in each region
regionMap(cells, cellType = "clusters", limit = c(0.2, 5))

## -----------------------------------------------------------------------------
# Extract cell information and filter to specific image.
df <- colData(cells) |>
  as.data.frame() |>
  filter(imageID == "Point2206_pt1116_31620")

# Colour cells by their region.
ggplot(df, aes(x = m.cx, y = m.cy, colour = region)) +
  geom_point()

## ----eval = FALSE-------------------------------------------------------------
#  # Use hatching to visualise regions and cell types.
#  hatchingPlot(
#    cells,
#    useImages = "Point2206_pt1116_31620",
#    cellType = "clusters",
#    spatialCoords = c("m.cx", "m.cy")
#  )

## -----------------------------------------------------------------------------
# Use hatching to visualise regions and cell types.
# Relabel the hatching of the regions.
hatchingPlot(
  cells,
  useImages = "Point2206_pt1116_31620",
  cellType = "clusters",
  spatialCoords = c("m.cx", "m.cy"),
  window = "square",
  nbp = 300,
  line.spacing = 41
) +

  scale_region_manual(values = c(
    region_1 = 2,
    region_2 = 1,
    region_3 = 5,
    region_4 = 4,
    region_5 = 3
  )) +

  guides(colour = guide_legend(ncol = 2))

## -----------------------------------------------------------------------------
# Test if the proportion of each region is associated
# with progression status.
testRegion <- colTest(
  cells[, cellsToUse],
  feature = "region",
  condition = "Status"
)

testRegion

## ----message=FALSE, warning=FALSE---------------------------------------------
# Create list to store data.frames
data <- list()

# Add proportions of each cell type in each image
data[["props"]] <- getProp(cells, "clusters")

# Add pair-wise associations
data[["dist"]] <- getPairwise(
  cells,
  spatialCoords = c("m.cx", "m.cy"),
  cellType = "clusters",
  Rs = c(20, 50, 100),
  sigma = 50,
  BPPARAM = BPPARAM
)
data[["dist"]] <- as.data.frame(data[["dist"]])


# Add proportions of each region in each image
# to the list of dataframes.
data[["regions"]] <- getProp(cells, "region")

# Subset data images with progression status and NA clinical variables.
measurements <- lapply(data, function(x) x[imagesToUse, ])

# Set seed
set.seed(51773)

# Perform cross-validation of an elastic net model
# with 100 repeats of 5-fold cross-validation.
cv <- crossValidate(
  measurements = measurements,
  outcome = clinical[imagesToUse, "Status"],
  classifier = "GLM",
  nFolds = 5,
  nRepeats = 100,
  nCores = nCores
)

## -----------------------------------------------------------------------------
# Calculate AUC for each cross-validation repeat and plot.
performancePlot(
  cv,
  metric = "AUC",
  characteristicsList = list(x = "Assay Name")
)

## -----------------------------------------------------------------------------
sessionInfo()