## ----biocstyle, echo = FALSE, results = "asis"-----------------------------
BiocStyle::markdown() 

## ----init, message = FALSE, echo = FALSE, results = "hide"-----------------
## Silently loading all packages
library(BiocStyle)
library(xcms)
library(faahKO)
library(pander)
## Use socket based parallel processing on Windows systems
## if (.Platform$OS.type == "unix") {
##     register(bpstart(MulticoreParam(2)))
## } else {
##     register(bpstart(SnowParam(2)))
## }
register(SerialParam()) 

## ----load-libs-pheno, message = FALSE--------------------------------------
library(xcms)
library(faahKO)
library(RColorBrewer)
library(pander)
library(magrittr)

## Get the full path to the CDF files
cdfs <- dir(system.file("cdf", package = "faahKO"), full.names = TRUE,
	    recursive = TRUE)
## Create a phenodata data.frame
pd <- data.frame(sample_name = sub(basename(cdfs), pattern = ".CDF",
				   replacement = "", fixed = TRUE),
		 sample_group = c(rep("KO", 6), rep("WT", 6)),
		 stringsAsFactors = FALSE) 

## ----load-with-msnbase, message = FALSE------------------------------------
raw_data <- readMSData(files = cdfs, pdata = new("NAnnotatedDataFrame", pd),
		       mode = "onDisk") 

## ----data-inspection-rtime, message = FALSE--------------------------------
head(rtime(raw_data)) 

## ----data-inspection-mz, message = FALSE-----------------------------------
mzs <- mz(raw_data)

## Split the list by file
mzs_by_file <- split(mzs, f = fromFile(raw_data))

length(mzs_by_file) 

## ----data-inspection-bpc, message = FALSE, fig.align = "center", fig.width = 12, fig.height = 6----
## Get the base peak chromatograms. This reads data from the files.
bpis <- chromatogram(raw_data, aggregationFun = "max")
## Define colors for the two groups
group_colors <- brewer.pal(3, "Set1")[1:2]
names(group_colors) <- c("KO", "WT")

## Plot all chromatograms.
plot(bpis, col = group_colors[raw_data$sample_group])
 

## ----data-inspection-chromatogram, message = FALSE-------------------------
bpi_1 <- bpis[1, 1]
head(rtime(bpi_1))
head(intensity(bpi_1)) 

## ----data-inspection-tic-boxplot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 4, fig.cap = "Distribution of total ion currents per file."----
## Get the total ion current by file
tc <- split(tic(raw_data), f = fromFile(raw_data))
boxplot(tc, col = group_colors[raw_data$sample_group],
	ylab = "intensity", main = "Total ion current") 

## ----peak-detection-plot-eic, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 5, fig.cap = "Extracted ion chromatogram for one peak."----
## Define the rt and m/z range of the peak area
rtr <- c(2700, 2900)
mzr <- c(334.9, 335.1)
## extract the chromatogram
chr_raw <- chromatogram(raw_data, mz = mzr, rt = rtr)
plot(chr_raw, col = group_colors[chr_raw$sample_group]) 

## ----peak-detection-plot-ms-data, message = FALSE, warning = FALSE, fig.aligh = "center", fig.width = 14, fig.height = 14, fig.cap = "Visualization of the raw MS data for one peak. For each plot: upper panel: chromatogram plotting the intensity values against the retention time, lower panel m/z against retention time plot. The individual data points are colored according to the intensity."----
raw_data %>%
    filterRt(rt = rtr) %>%
    filterMz(mz = mzr) %>%
    plot(type = "XIC") 

## ----peak-detection-centwave, message = FALSE, results = "hide"------------
cwp <- CentWaveParam(peakwidth = c(30, 80), noise = 1000)
xdata <- findChromPeaks(raw_data, param = cwp) 

## ----peak-detection-chromPeaks, message = FALSE----------------------------
head(chromPeaks(xdata)) 

## ----peak-detection-peaks-per-sample, message = FALSE, results = "asis"----
summary_fun <- function(z) {
    c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"]))
}
T <- lapply(split.data.frame(chromPeaks(xdata),
			     f = chromPeaks(xdata)[, "sample"]),
	    FUN = summary_fun)
T <- do.call(rbind, T)
rownames(T) <- basename(fileNames(xdata))
pandoc.table(T,
	     caption = paste0("Summary statistics on identified chromatographic",
			      " peaks. Shown are number of identified peaks per",
			      " sample and widths/duration of chromatographic ",
			      "peaks.")) 

## ----peak-detection-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "Identified chromatographic peaks in the m/z by retention time space for one sample."----
plotChromPeaks(xdata, file = 3) 

## ----peak-detection-chrom-peak-image, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Frequency of identified chromatographic peaks along the retention time axis. The frequency is color coded with higher frequency being represented by yellow-white. Each line shows the peak frequency for one file."----
plotChromPeakImage(xdata) 

## ----peak-detection-highlight-chrom-peaks-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Signal for an example peak. Red and blue colors represent KO and wild type samples, respectively. The rectangles indicate the identified chromatographic peaks per sample."----
plot(chr_raw, col = group_colors[chr_raw$sample_group], lwd = 2)
highlightChromPeaks(xdata, border = group_colors[chr_raw$sample_group],
		    lty = 3, rt = rtr, mz = mzr) 

## ----peak-detection-chrom-peak-table-selected, message = FALSE, results = "asis"----
pander(chromPeaks(xdata, mz = mzr, rt = rtr),
       caption = paste("Identified chromatographic peaks in a selected ",
		       "m/z and retention time range.")) 

## ----peak-detection-chrom-peak-intensity-boxplot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak intensity distribution per sample."----
## Extract a list of per-sample peak intensities (in log2 scale)
ints <- split(log2(chromPeaks(xdata)[, "into"]),
	      f = chromPeaks(xdata)[, "sample"])
boxplot(ints, varwidth = TRUE, col = group_colors[xdata$sample_group],
	ylab = expression(log[2]~intensity), main = "Peak intensities")
grid(nx = NA, ny = NULL) 

## ----alignment-obiwarp, message = FALSE, results = "hide"------------------
xdata <- adjustRtime(xdata, param = ObiwarpParam(binSize = 0.6)) 

## ----alignment-rtime, message = FALSE--------------------------------------
## Extract adjusted retention times
head(adjustedRtime(xdata))

## Or simply use the rtime method
head(rtime(xdata)) 

## ----alignment-obiwarp-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Obiwarp aligned data. Base peak chromatogram after alignment (top) and difference between adjusted and raw retention times along the retention time axis (bottom)."----
## Get the base peak chromatograms.
bpis_adj <- chromatogram(xdata, aggregationFun = "max")
par(mfrow = c(2, 1), mar = c(4.5, 4.2, 1, 0.5))
plot(bpis_adj, col = group_colors[bpis_adj$sample_group])
## Plot also the difference of adjusted to raw retention time.
plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) 

## ----alignment-drop, message = FALSE---------------------------------------
## Does the object have adjusted retention times?
hasAdjustedRtime(xdata)

## Drop the alignment results.
xdata <- dropAdjustedRtime(xdata)

## Does the object have adjusted retention times?
hasAdjustedRtime(xdata) 

## ----alignment-peak-groups, message = FALSE--------------------------------
## Correspondence: group peaks across samples.
pdp <- PeakDensityParam(sampleGroups = xdata$sample_group,
			minFraction = 0.8)
xdata <- groupChromPeaks(xdata, param = pdp)

## Now the retention time correction.
pgp <- PeakGroupsParam(minFraction = 0.85)

## Get the peak groups that would be used for alignment.
xdata <- adjustRtime(xdata, param = pgp)
 

## ----alignment-peak-groups-plot, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 8, fig.cap = "Peak groups aligned data."----
## Plot the difference of adjusted to raw retention time.
plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group],
		  peakGroupsCol = "grey", peakGroupsPch = 1) 

## ----alignment-peak-groups-example-peak, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example extracted ion chromatogram before (top) and after alignment (bottom)."----
par(mfrow = c(2, 1))
## Plot the raw data
plot(chr_raw, col = group_colors[chr_raw$sample_group])

## Extract the chromatogram from the adjusted object
chr_adj <- chromatogram(xdata, rt = rtr, mz = mzr)
plot(chr_adj, col = group_colors[chr_raw$sample_group]) 

## ----correspondence-example-slice, message = FALSE, fig.align = "center", fig.width = 10, fig.height = 10, fig.cap = "Example for peak density correspondence. Upper panel: chromatogram for an mz slice with multiple chromatographic peaks. Middle and lower panel: identified chromatographic peaks at their retention time (x-axis) and index within samples of the experiments (y-axis) for different values of the bw parameter. The black line represents the peak density estimate. Grouping of peaks (based on the provided settings) is indicated by grey rectangles."----
## Define the mz slice.
mzr <- c(305.05, 305.15)

## Extract and plot the chromatograms
chr_mzr <- chromatogram(xdata, mz = mzr, rt = c(2500, 4000))
par(mfrow = c(3, 1), mar = c(1, 4, 1, 0.5))
cols <- group_colors[chr_mzr$sample_group]
plot(chr_mzr, col = cols, xaxt = "n", xlab = "")
## Highlight the detected peaks in that region.
highlightChromPeaks(xdata, mz = mzr, col = cols, type = "point", pch = 16)
## Define the parameters for the peak density method
pdp <- PeakDensityParam(sampleGroups = xdata$sample_group,
			minFraction = 0.4, bw = 30)
par(mar = c(4, 4, 1, 0.5))
plotChromPeakDensity(xdata, mz = mzr, col = cols, param = pdp,
		     pch = 16, xlim = c(2500, 4000))
## Use a different bw
pdp <- PeakDensityParam(sampleGroups = xdata$sample_group,
			minFraction = 0.4, bw = 20)
plotChromPeakDensity(xdata, mz = mzr, col = cols, param = pdp,
		     pch = 16, xlim = c(2500, 4000)) 

## ----correspondence, message = FALSE---------------------------------------
## Perform the correspondence
pdp <- PeakDensityParam(sampleGroups = xdata$sample_group,
			minFraction = 0.4, bw = 20)
xdata <- groupChromPeaks(xdata, param = pdp) 

## ----correspondence-featureDefs, message = FALSE---------------------------
## Extract the feature definitions
featureDefinitions(xdata)
 

## ----correspondence-feature-values, message = FALSE------------------------
## Extract the into column for each feature.
head(featureValues(xdata, value = "into"))
 

## ----fill-chrom-peaks, message = FALSE-------------------------------------
## Filling missing peaks using default settings. Alternatively we could
## pass a FillChromPeaksParam object to the method.
xdata <- fillChromPeaks(xdata)

head(featureValues(xdata))
 

## ----fill-chrom-peaks-compare, message = FALSE-----------------------------
## Missing values before filling in peaks
apply(featureValues(xdata, filled = FALSE), MARGIN = 2,
      FUN = function(z) sum(is.na(z)))

## Missing values after filling in peaks
apply(featureValues(xdata), MARGIN = 2,
      FUN = function(z) sum(is.na(z)))
 

## ----final-pca, message = FALSE, fig.align = "center", fig.width = 8, fig.height = 8, fig.cap = "PCA for the faahKO data set, un-normalized intensities."----
## Extract the features and log2 transform them
ft_ints <- log2(featureValues(xdata, value = "into"))

## Perform the PCA omitting all features with an NA in any of the
## samples. Also, the intensities are mean centered.
pc <- prcomp(t(na.omit(ft_ints)), center = TRUE)

## Plot the PCA
cols <- group_colors[xdata$sample_group]
pcSummary <- summary(pc)
plot(pc$x[, 1], pc$x[,2], pch = 21, main = "", 
     xlab = paste0("PC1: ", format(pcSummary$importance[2, 1] * 100,
				   digits = 3), " % variance"),
     ylab = paste0("PC2: ", format(pcSummary$importance[2, 2] * 100,
				   digits = 3), " % variance"),
     col = "darkgrey", bg = cols, cex = 2)
grid()
text(pc$x[, 1], pc$x[,2], labels = xdata$sample_name, col = "darkgrey",
     pos = 3, cex = 2)
 

## ----processhistory, message = FALSE---------------------------------------
processHistory(xdata) 

## ----processhistory-select, message = FALSE--------------------------------
ph <- processHistory(xdata, type = "Retention time correction")

ph 

## ----processhistory-param, message = FALSE---------------------------------
## Access the parameter
processParam(ph[[1]])
 

## ----subset-filterFile, message = FALSE------------------------------------
subs <- filterFile(xdata, file = c(2, 4))

## Do we have identified chromatographic peaks?
hasChromPeaks(subs) 

## ----subset-filterFile-2, message = FALSE----------------------------------
## Do we still have features?
hasFeatures(subs)

## Do we still have adjusted retention times?
hasAdjustedRtime(subs) 

## ----subset-filterFile-3, message = FALSE----------------------------------
subs <- filterFile(xdata, keepAdjustedRtime = TRUE)

hasAdjustedRtime(subs) 

## ----subset-filterRt, message = FALSE--------------------------------------
subs <- filterRt(xdata, rt = c(3000, 3500))

range(rtime(subs)) 

## ----subset-filterRt-2, message = FALSE------------------------------------
hasAdjustedRtime(subs) 

## ----subset-filterRt-3, message = FALSE------------------------------------
hasChromPeaks(subs)

range(chromPeaks(subs)[, "rt"]) 

## ----subset-bracket, message = FALSE, warning = FALSE----------------------
## Extract all data from the 3rd file.
one_file <- filterFile(xdata, file = 3)

one_file_2 <- xdata[fromFile(xdata) == 3]

## Is the content the same?
all.equal(spectra(one_file), spectra(one_file_2)) 

## ----subset-bracket-peaks, message = FALSE---------------------------------
## Subsetting with filterFile preserves chromatographic peaks
head(chromPeaks(one_file))

## Subsetting with [ not
head(chromPeaks(one_file_2)) 

## ----subset-bracket-keepAdjustedRtime, message = FALSE, warnings = FALSE----
subs <- xdata[20:30, keepAdjustedRtime = TRUE]

hasAdjustedRtime(subs)

## Access adjusted retention times:
rtime(subs)

## Access raw retention times:
rtime(subs, adjusted = FALSE) 

## ----subset-double-bracket, message = FALSE--------------------------------
## Extract a single spectrum
xdata[[14]] 

## ----subset-split, message = FALSE-----------------------------------------
x_list <- split(xdata, f = fromFile(xdata), keepAdjustedRtime = TRUE)

lengths(x_list)

lapply(x_list, hasAdjustedRtime) 

## ----subset-split-by-file, message = FALSE---------------------------------
xdata_by_file <- splitByFile(xdata, f = factor(1:length(fileNames(xdata))))

lapply(xdata_by_file, hasChromPeaks) 

## ----multicore, message = FALSE, eval = FALSE------------------------------
#  register(bpstart(MulticoreParam(2)))

## ----snow, message = FALSE, eval = FALSE-----------------------------------
#  register(bpstart(SnowParam(2)))