## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html
)

## ----scp_framework, results='markup', fig.cap="`scp` relies on `SingleCellExperiment` and `QFeatures` objects.", echo=FALSE, out.width='100%', fig.align='center'----
knitr::include_graphics("./figures/SCP_framework.png", error = FALSE)

## ----load_scp, message = FALSE------------------------------------------------
library("scp")

## ----load_other_package, message = FALSE--------------------------------------
library("ggplot2")
library("magrittr")
library("dplyr")

## ----load_mqScpData-----------------------------------------------------------
data("mqScpData")
dim(mqScpData)
mqScpData[1:10, 1:5]

## ----load_sampleAnnotation----------------------------------------------------
data("sampleAnnotation")
sampleAnnotation

## ----readSCP------------------------------------------------------------------
scp <- readSCP(quantTable = mqScpData,
               metaTable = sampleAnnotation,
               channelCol = "Channel",
               batchCol = "Set")

## ----overview-----------------------------------------------------------------
scp

## ----zeroIsNA-----------------------------------------------------------------
scp <- zeroIsNA(scp, i = 1:4)

## ----rowDataNames-------------------------------------------------------------
rowDataNames(scp)

## ----filter_psms--------------------------------------------------------------
scp <- filterFeatures(scp,
                      ~ Reverse != "+" &
                        !grepl("REV|CON", protein) &
                        Potential.contaminant != "+" &
                        !is.na(PIF) & PIF > 0.8)

## ----dims---------------------------------------------------------------------
dims(scp)

## ----filter_assays------------------------------------------------------------
keepAssay <- dims(scp)[1, ] > 150
scp <- scp[, , keepAssay]
scp

## ----show_annotation----------------------------------------------------------
colData(scp)[, "SampleType"] %>%
  table

## ----computeSCR---------------------------------------------------------------
scp <- computeSCR(scp,
                  i = 1:3,
                  colDataCol = "SampleType",
                  carrierPattern = "Carrier",
                  samplePattern = "Macrophage|Monocyte")

## ----plot_SCR, warning=FALSE, message=FALSE-----------------------------------
scp %>%
  rowDataToDF(i = 1:3,
              vars = ".meanSCR") %>%
  data.frame %>%
  ggplot(aes(x = .meanSCR)) +
  geom_histogram() +
  geom_vline(xintercept = 0.1)

## ----filter_SCR---------------------------------------------------------------
scp <- filterFeatures(scp,
                      ~ !is.na(.meanSCR) &
                        .meanSCR < 0.1)

## ----computeFDR---------------------------------------------------------------
scp <- computeFDR(scp,
                  i = 1:3,
                  groupCol = "peptide",
                  pepCol = "dart_PEP")

## ----filter_FDR---------------------------------------------------------------
scp <- filterFeatures(scp,
                      ~ .FDR < 0.01)

## ----divideByReference--------------------------------------------------------
scp <- divideByReference(scp,
                         i = 1:3,
                         colDataCol = "SampleType",
                         samplePattern = ".",
                         refPattern = "Reference")

## ----features_aggregation, results = 'markup', fig.cap = "Conceptual illustration of feature aggregation.", echo=FALSE, out.width='100%', fig.align='center'----
knitr::include_graphics("./figures/feature_aggregation.png", error = FALSE)

## ----show_agg_psms------------------------------------------------------------
scp

## ----aggregate_peptides, message = FALSE--------------------------------------
scp <- aggregateFeaturesOverAssays(scp,
                                   i = 1:3,
                                   fcol = "peptide",
                                   name = paste0("peptides_", names(scp)),
                                   fun = matrixStats::colMedians, na.rm = TRUE)

## ----show_agg_peptides--------------------------------------------------------
scp

## ----joinAssays---------------------------------------------------------------
scp <- joinAssays(scp,
                  i = 4:6,
                  name = "peptides")

## ----show_join----------------------------------------------------------------
scp

## ----transferColDataToAssay---------------------------------------------------
colData(scp[["peptides"]])
scp <- transferColDataToAssay(scp, "peptides")
colData(scp[["peptides"]])

## ----subset_single_cells------------------------------------------------------
sce <- scp[["peptides"]]
sce <- sce[, sce$SampleType %in% c("Blank", "Macrophage", "Monocyte")]

## ----addAssay_filter1---------------------------------------------------------
scp %>%
  addAssay(y = sce,
           name = "peptides_filter1") %>%
  addAssayLinkOneToOne(from = "peptides",
                       to = "peptides_filter1") ->
  scp

## ----show_cell_filter---------------------------------------------------------
scp

## ----compute_colMedians-------------------------------------------------------
scp[["peptides_filter1"]] %>%
  assay %>%
  matrixStats::colMedians(na.rm = TRUE) ->
  scp[["peptides_filter1"]]$MedianRI

## ----plot_medianRI, warning=FALSE, message=FALSE------------------------------
scp[["peptides_filter1"]] %>%
  colData %>%
  data.frame %>%
  ggplot(aes(x = MedianRI, fill = SampleType)) +
  geom_boxplot() +
  scale_x_log10()

## ----computeMedianCV----------------------------------------------------------
scp <- computeMedianCV(scp,
                       i = "peptides_filter1",
                       proteinCol = "protein",
                       peptideCol = "peptide",
                       batchCol = "Set")

## ----plot_medianCV, message = FALSE, warning = FALSE--------------------------
scp[["peptides_filter1"]] %>%
  colData %>%
  data.frame %>%
  ggplot(aes(x = .MedianCV,
             fill = SampleType)) +
  geom_boxplot() +
  geom_vline(xintercept = 0.3)

## ----create_filter2-----------------------------------------------------------
keepSample <- scp[["peptides_filter1"]]$.MedianCV < 0.3 &
  scp[["peptides_filter1"]]$SampleType %in% c("Macrophage", "Monocyte")
keepSample[is.na(keepSample)] <- FALSE

## ----apply_filter2------------------------------------------------------------
sce <- scp[["peptides_filter1"]]
sce <- sce[, keepSample]

## ----add_filter2--------------------------------------------------------------
addAssay(scp,
         y = sce,
         name = "peptides_filter2") %>%
  addAssayLinkOneToOne(from = "peptides_filter1",
                       to = "peptides_filter2") ->
  scp

## ----normalize_scale----------------------------------------------------------
scp %>%
    ## Divide columns by median
    sweep(i = "peptides_filter2",
        MARGIN = 2,
        FUN = "/",
        STATS = colMedians(assay(scp[["peptides_filter2"]]),
                           na.rm = TRUE),
        name = "peptides_norm_col") %>%
    ## Divide rows by mean
    sweep(i = "peptides_norm_col",
        MARGIN = 1,
        FUN = "/",
        STATS = rowMeans(assay(.[["peptides_norm_col"]]),
                         na.rm = TRUE),
        name = "peptides_norm") ->
    scp

## ----show_sweep---------------------------------------------------------------
scp

## ----filterNA-----------------------------------------------------------------
scp <- filterNA(scp,
                i = "peptides_norm",
                pNA = 0.99)

## ----logTransform-------------------------------------------------------------
scp <- logTransform(scp,
                    base = 2,
                    i = "peptides_norm",
                    name = "peptides_log")

## ----aggregate_proteins-------------------------------------------------------
scp <- aggregateFeatures(scp,
                         i = "peptides_log",
                         name = "proteins",
                         fcol = "protein",
                         fun = matrixStats::colMedians, na.rm = TRUE)

## ----show_agg_proteins--------------------------------------------------------
scp

## ----normalize_center---------------------------------------------------------
scp %>%
    ## Center columns with median
    sweep(i = "proteins",
          MARGIN = 2,
          FUN = "-",
          STATS = colMedians(assay(scp[["proteins"]]),
                             na.rm = TRUE),
          name = "proteins_norm_col") %>%
    ## Center rows with mean
    sweep(i = "proteins_norm_col",
          MARGIN = 1,
          FUN = "-",
          STATS = rowMeans(assay(.[["proteins_norm_col"]]),
                           na.rm = TRUE),
          name = "proteins_norm") ->
    scp

## ----missingness--------------------------------------------------------------
scp[["proteins_norm"]] %>%
  assay %>%
  is.na %>%
  mean

## ----impute-------------------------------------------------------------------
scp <- impute(scp,
              i = "proteins_norm",
              method = "knn",
              k = 3, rowmax = 1, colmax= 1,
              maxp = Inf, rng.seed = 1234)

## ----missingness_imputed------------------------------------------------------
scp[["proteins_norm"]] %>%
  assay %>%
  is.na %>%
  mean

## ----transferColDataToAssay_proteins------------------------------------------
scp <- transferColDataToAssay(scp, i = "proteins_norm")
sce <- scp[["proteins_norm"]]

## ----prepare_batch_correction-------------------------------------------------
batch <- colData(sce)$Set
model <- model.matrix(~ SampleType, data = colData(sce))

## ----compute_batch_correction, results = 'hide', message = FALSE--------------
library(sva)
assay(sce) <- ComBat(dat = assay(sce),
                     batch = batch,
                     mod = model)

## ----add_batch_correction-----------------------------------------------------
addAssay(scp,
         y = sce,
         name = "proteins_batchC") %>%
  addAssayLinkOneToOne(from = "proteins_norm",
                       to = "proteins_batchC") ->
  scp

## ----load_scater--------------------------------------------------------------
library(scater)

## ----run_PCA------------------------------------------------------------------
scp[["proteins_batchC"]] <- runPCA(scp[["proteins_batchC"]],
                                   ncomponents = 5,
                                   ntop = Inf,
                                   scale = TRUE,
                                   exprs_values = 1,
                                   name = "PCA")

## ----plot_PCA-----------------------------------------------------------------
plotReducedDim(scp[["proteins_batchC"]],
               dimred = "PCA",
               colour_by = "SampleType",
               point_alpha = 1)

## ----run_UMAP-----------------------------------------------------------------
scp[["proteins_batchC"]] <- runUMAP(scp[["proteins_batchC"]],
                                    ncomponents = 2,
                                    ntop = Inf,
                                    scale = TRUE,
                                    exprs_values = 1,
                                    n_neighbors = 3,
                                    dimred = "PCA",
                                    name = "UMAP")

## ----plot_UMAP----------------------------------------------------------------
plotReducedDim(scp[["proteins_batchC"]],
               dimred = "UMAP",
               colour_by = "SampleType",
               point_alpha = 1)

## ----monitor_plot, warning = FALSE, fig.width = 10, fig.height = 10, out.width = "100%"----
scp %>%
  ## Get the features related to LMNA (P02545)
  subsetByFeature("P02545") %>%
  ## Format the `QFeatures` to a long format table
  longFormat(colDataCols = c("Set", "SampleType", "Channel")) %>%
  data.frame %>%
  ## This is used to preserve ordering of the samples and assays in ggplot2
  mutate(assay = factor(assay, levels = names(scp)),
         Channel = sub("RI", "TMT-", Channel),
         Channel = factor(Channel, levels = unique(Channel))) %>%
  ## Start plotting
  ggplot(aes(x = Channel, y = value, group = rowname, col = SampleType)) +
  geom_point() +
  ## Plot every assay in a separate facet
  facet_wrap(facets = vars(assay), scales = "free_y", ncol = 3) +
  ## Annotate plot
  xlab("Channels") +
  ylab("Intensity (arbitrary units)") +
  ## Improve plot aspect
  theme(axis.text.x = element_text(angle = 90),
        strip.text = element_text(hjust = 0),
        legend.position = "bottom")

## ----setup2, include = FALSE--------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "",
    crop = NULL
)

## ----sessioninfo, echo=FALSE--------------------------------------------------
sessionInfo()