## ----environment, cache=FALSE, echo=FALSE-------------------------------- suppressPackageStartupMessages(library("MSnbase")) suppressPackageStartupMessages(library("zoo")) if (.Platform$OS.type != "windows") suppressPackageStartupMessages(require("doMC")) suppressPackageStartupMessages(require("multicore")) suppressPackageStartupMessages(require(Rdisop)) library("grid") library("reshape2") ## ----knitr, include=FALSE, cache=FALSE----------------------------------- library(knitr) opts_chunk$set(fig.align = 'center', fig.show = 'hold', par = TRUE, prompt = TRUE, comment = NA) options(replace.assign = TRUE, width = 68) ## see ?evaluate::evaluate for details opts_knit$set(error=TRUE) ## knit_hooks$set(par = function(before, options, envir) { ## if (before && options$fig.show != 'none') ## par(mar = c(4,4,.1,.1), ## cex.lab = .95, ## cex.axis = .9, ## mgp = c(2,.7,0), ## tcl = -.3) ## }) ## ----readdata, echo=TRUE, cache=FALSE, tidy=FALSE------------------------ file <- dir(system.file(package = "MSnbase", dir = "extdata"), full.names = TRUE, pattern = "mzXML$") rawdata <- readMSData(file, msLevel = 2, verbose = FALSE) ## ----MSnExp, cache=FALSE, echo=TRUE-------------------------------------- library("MSnbase") itraqdata head(fData(itraqdata)) ## ----experimentsize, echo=FALSE, cache=FALSE----------------------------- sz <- sum(sapply(assayData(itraqdata), object.size)) + object.size(itraqdata) sz <- as.numeric(sz) sz <- round(sz/(1024^2), 2) ## ----Spectrum, cache=FALSE, echo=TRUE------------------------------------ sp <- itraqdata[["X1"]] sp ## ----accessors, cache=FALSE, echo=TRUE----------------------------------- peaksCount(sp) head(peaksCount(itraqdata)) rtime(sp) head(rtime(itraqdata)) ## ----ReporterIons-------------------------------------------------------- iTRAQ4 ## ----spectrumPlot, dev='pdf', fig.width=6, fig.height=4, fig.keep='high'---- plot(sp, reporters = iTRAQ4, full = TRUE) ## ----bsaSelect, eval=TRUE, echo=FALSE------------------------------------ ## bsasel <- fData(itraqdata)$ProteinAccession == "BSA" bsasel <- 1:3 ## ----subset, echo=TRUE--------------------------------------------------- sel <- fData(itraqdata)$ProteinAccession == "BSA" bsa <- itraqdata[sel] bsa as.character(fData(bsa)$ProteinAccession) ## ----msnexpPlot, dev='pdf', fig.width=3.7, fig.height=4.5, fig.keep='last'---- plot(bsa, reporters = iTRAQ4, full = FALSE) + theme_gray(8) ## ----msnexpIdentification, echo=TRUE, cache=FALSE, tidy=FALSE------------ ## find path to a mzXML file quantFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "mzXML$") ## find path to a mzIdentML file identFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "mzid$") ## create basic MSnExp msexp <- readMSData(quantFile, verbose = FALSE) head(fData(msexp), n = 2) ## ----msnexpIdentification2, echo=TRUE, cache=FALSE, tidy=FALSE----------- ## add identification information msexp <- addIdentificationData(msexp, filenames = identFile, verbose = FALSE) head(fData(msexp), n = 2) ## ----msnexpIdentification3, echo=TRUE, cache=FALSE, tidy=FALSE----------- idSummary(msexp) ## ----msnexpIdentification4, echo=TRUE, cache=FALSE, tidy=FALSE----------- fData(msexp)$pepseq msexp <- removeNoId(msexp) fData(msexp)$pepseq idSummary(msexp) ## ----removePeaks, echo=TRUE, cache=FALSE--------------------------------- experiment <- removePeaks(itraqdata, t = 400, verbose = FALSE) ## total ion current ionCount(itraqdata[["X55"]]) ionCount(experiment[["X55"]]) ## ----spectrum-clean-plot, dev='pdf', fig.width=6, fig.height=3, echo=FALSE, fig.keep='high'---- p1 <- plot(itraqdata[["X55"]], full = TRUE, plot = FALSE) + theme_gray(5) p2 <- plot(experiment[["X55"]], full = TRUE, plot = FALSE) + theme_gray(5) grid.newpage() pushViewport(viewport(layout = grid.layout(1, 2))) vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) print(p1,vp=vplayout(1,1)) print(p2,vp=vplayout(1,2)) ## ----clean, echo=TRUE, cache=FALSE--------------------------------------- ## number of peaks peaksCount(itraqdata[["X55"]]) peaksCount(experiment[["X55"]]) experiment <- clean(experiment, verbose = FALSE) peaksCount(experiment[["X55"]]) ## ----preprosp, cache=FALSE, echo=FALSE----------------------------------- int <- c(0,1,1,3,1,1,0,0,0,1,3,7,3,1,0) mz <- c(113.9,114.0,114.05,114.1,114.15,114.2,114.25, 114.3,114.35,114.4,114.42,114.48,114.5,114.55,114.6) ppsp <- new("Spectrum2",intensity=int,mz=mz,centroided=FALSE) p1 <- plot(ppsp, full = TRUE, plot = FALSE) + theme_gray(5) + geom_point(size=3,alpha=I(1/3)) + geom_hline(yintercept=3,linetype=2) + ggtitle("Original spectrum") p2 <- plot(removePeaks(ppsp,t=3), full=TRUE, plot = FALSE) + theme_gray(5) + geom_point(size=3,alpha=I(1/3)) + geom_hline(yintercept=3,linetype=2) + ggtitle("Peaks < 3 removed") p3 <- plot(clean(removePeaks(ppsp,t=3)), full = TRUE, plot = FALSE) + theme_gray(5) + geom_point(size=3,alpha=I(1/3)) + geom_hline(yintercept=3,linetype=2) + ggtitle("Peaks < 3 removed and cleaned") ## ----preprocPlot, dev='pdf', fig.width=3, fig.height=6, echo=FALSE------- grid.newpage() pushViewport(viewport(layout = grid.layout(3, 1))) print(p1, vp=vplayout(1,1)) print(p2, vp=vplayout(2,1)) print(p3, vp=vplayout(3,1)) ## ----trimMz, echo=TRUE, cache=FALSE-------------------------------------- range(mz(itraqdata[["X55"]])) experiment <- trimMz(experiment, mzlim = c(112,120)) range(mz(experiment[["X55"]])) experiment ## ----reporters, echo=TRUE, cache=FALSE----------------------------------- mz(iTRAQ4) width(iTRAQ4) ## ----simplesp, cache=FALSE, echo=FALSE, fig.keep='none'------------------ int <- c(0,1,1,3,1,1,0) mz <- c(113.9,114.0,114.05,114.1,114.15,114.2,114.25) ssp <- new("Spectrum2",intensity=int,mz=mz,centroided=FALSE) p <- plot(ssp, full=TRUE, plot=FALSE) p <- p + theme_gray(5) ## ----quantitationPlot, dev='pdf',fig.width=5, fig.height=5, echo=FALSE---- grid.newpage() pushViewport(viewport(layout = grid.layout(2, 2))) vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) print(p + ggtitle("Quantitation using 'sum'") + geom_point(size = 3, alpha = I(1/3), colour = "red"), vp = vplayout(1, 1)) print(p + ggtitle("Quantitation using 'max'") + geom_point(aes(x = 114.1, y = 3), alpha = I(1/18), colour = "red", size = 3), vp = vplayout(1, 2)) print(p + ggtitle("Trapezoidation and strict=FALSE") + geom_polygon(alpha = I(1/5), fill = "red"), vp = vplayout(2, 1)) print(p + ggtitle("Trapezoidation and strict=TRUE") + geom_polygon(aes(x = c(NA, 114.05, 114.05, 114.1, 114.15, 114.15, NA), y=c(NA, 0, 1, 3, 1, 0, NA)), fill = "red", alpha = I(1/5)), vp = vplayout(2,2)) ## ----quantify, echo=TRUE, cache=FALSE, tidy=FALSE------------------------ qnt <- quantify(experiment, method = "trap", reporters = iTRAQ4, strict = FALSE, parallel = FALSE, verbose = FALSE) qnt head(exprs(qnt)) ## ----filterNA, echo=TRUE------------------------------------------------- table(is.na(qnt)) qnt <- filterNA(qnt, pNA = 0) sum(is.na(qnt)) ## ----removeNa, echo=TRUE, eval=FALSE------------------------------------- ## whichRow <- which(is.na((qnt))) %% nrow(qnt) ## qnt <- qnt[-whichRow, ] ## ----pheplus1, echo=TRUE, cache=FALSE------------------------------------ library(Rdisop) ## Phenylalanine immonium ion Fim <- getMolecule("C8H10N") getMass(Fim) isotopes <- getIsotope(Fim) F1 <- isotopes[2, 2] F1 ## ----purityCorrect, echo=TRUE, cache=FALSE, tidy = FALSE----------------- impurities <- matrix(c(0.929, 0.059, 0.002, 0.000, 0.020, 0.923, 0.056, 0.001, 0.000, 0.030, 0.924, 0.045, 0.000, 0.001, 0.040, 0.923), nrow = 4) qnt.crct <- purityCorrect(qnt, impurities) head(exprs(qnt)) head(exprs(qnt.crct)) ## ----normalise, echo=TRUE, cache=FALSE----------------------------------- qnt.max <- normalise(qnt, "max") qnt.sum <- normalise(qnt, "sum") qnt.quant <- normalise(qnt, "quantiles") qnt.qrob <- normalise(qnt, "quantiles.robust") qnt.vsn <- normalise(qnt, "vsn") ## ----normPlot, dev='pdf', fig.width=5, fig.height=7, echo=FALSE---------- .plot <- function(x,ttl=NULL) { boxplot(exprs(x), main=ifelse(is.null(ttl),processingData(x)@processing[2],ttl), cex.main=.8, cex.lab=.5, cex.axis=.5, cex=.8) grid() } oldmar <- par()$mar par(mfrow=c(3,2),mar=c(2.9,2.9,2.9,1)) .plot(qnt, ttl = "Non-normalised data") .plot(qnt.max, ttl = "Maximum") .plot(qnt.sum, ttl = "Sum") .plot(qnt.quant, ttl = "Quantile") .plot(qnt.qrob, ttl = "Robust quantile") .plot(qnt.vsn, ttl = "vsn") ## ----cvdata, echo=FALSE, cache=FALSE------------------------------------- sd1 <- apply(log2(exprs(qnt))+10,1,sd) mn1 <- apply(log2(exprs(qnt))+10,1,mean) cv1 <- sd1/mn1 sd2 <- apply(exprs(qnt.vsn)+10,1,sd) mn2 <- apply(exprs(qnt.vsn)+10,1,mean) cv2 <- sd2/mn2 dfr <- rbind(data.frame(rank=order(mn1),cv=cv1,norm="raw"), data.frame(rank=order(mn2),cv=cv2,norm="vsn")) library("zoo") ## rmed1 <- rollapply(cv1,7,function(x) median(x,na.rm=TRUE)) ## rmed2 <- rollapply(cv2,7,function(x) median(x,na.rm=TRUE)) ## ## Calling directly rollapply.zoo to make it zoo_1.6-4 compatible. ## The above requires zoo >= 1.7-0, which is as of 15 March 2011 ## not yet available on CRAN (only on r-forge). rmed1 <- zoo:::rollapply.zoo(cv1,7,function(x) median(x,na.rm=TRUE)) rmed2 <- zoo:::rollapply.zoo(cv2,7,function(x) median(x,na.rm=TRUE)) dfr2 <- rbind(data.frame(x=seq(1,70,by=(70/length(rmed1))), y=rmed1,norm="raw"), data.frame(x=seq(1,70,by=(70/length(rmed1))), y=rmed2,norm="vsn")) p <- qplot(rank,cv,data=dfr,col=norm) + geom_line(data=dfr2,aes(x=x,y=y,colour=norm)) + theme_gray(7) ## ----cvPlot, dev='pdf', fig.width=5, fig.height=4, echo=FALSE------------ print(p) ## ----prepareMsnsetNormPlot, cache=FALSE, echo=FALSE, keep.fig='none'----- p <- plot(normalise(experiment[bsasel], "max"), reporters = iTRAQ4, full = FALSE, plot = FALSE) p <- p + theme_gray(7) ## ----msnexpNormPlot, dev='pdf', fig.width=5, fig.height=6, echo=FALSE---- print(p) ## ----makeGroups1,echo=FALSE,cache=FALSE---------------------------------- gb <- fData(qnt)$ProteinAccession ## ----makeGroups2,echo=TRUE,cache=FALSE----------------------------------- gb <- fData(qnt)$ProteinAccession table(gb) length(unique(gb)) ## ----combineFeatures, echo=TRUE, cache=FALSE----------------------------- qnt2 <- combineFeatures(qnt, groupBy = gb, fun = "median") qnt2 ## ----count--------------------------------------------------------------- sc <- quantify(msexp, method = "count") ## lets modify out data for demonstration purposes fData(sc)$accession[1] <- fData(sc)$accession[2] fData(sc)$accession sc <- combineFeatures(sc, groupBy = fData(sc)$accession, fun = "sum") exprs(sc) ## ----labelfree----------------------------------------------------------- fData(msexp)[, c("accession", "npsm")] ## ----SIn----------------------------------------------------------------- siquant <- quantify(msexp, method = "SIn") processingData(siquant) exprs(siquant) ## ----incompdiss, echo=TRUE, cache=FALSE, tidy=FALSE---------------------- iTRAQ5 incompdiss <- quantify(itraqdata, method = "trap", reporters = iTRAQ5, strict = FALSE, parallel = FALSE, verbose = FALSE) head(exprs(incompdiss)) ## ----incompdissPlot, dev='pdf', fig.width=5, fig.height=2.5, echo=FALSE, warning=FALSE---- dfr <- melt(exprs(incompdiss)) colnames(dfr) <- c("feature", "reporters", "intensity") dfr$reporters <- sub("iTRAQ5.", "", dfr$reporters) repsum <- apply(exprs(incompdiss), 1, function(x) sum(x[1:4])) dfr2 <- data.frame(iTRAQ1to4 = repsum, iTRAQ5 = exprs(incompdiss)[,5]) p1 <- ggplot(data = dfr, aes(x = reporters,y = log10(intensity))) + geom_boxplot() + theme_gray(6) p2 <- ggplot(data = dfr2, aes(x = log10(iTRAQ1to4), y = log10(iTRAQ5))) + geom_point(alpha = I(1/2)) + geom_abline(intercept = 0, slope = 1, linetype = "dotted") + stat_smooth(method = lm, se = FALSE) + xlab(label = expression(log[10]~sum~114~to~117)) + ylab(label = expression(log[10]~145)) + theme_gray(6) grid.newpage() pushViewport(viewport(layout = grid.layout(1, 2))) print(p1, vp = vplayout(1, 1)) print(p2, vp = vplayout(1, 2)) ## ----makeexp12, echo=TRUE, cache=FALSE, tidy = FALSE--------------------- exp1 <- quantify(itraqdata, reporters = iTRAQ4, parallel = FALSE, verbose = FALSE) sampleNames(exp1) exp2 <- quantify(rawdata, reporters = iTRAQ4, parallel = FALSE, verbose = FALSE) sampleNames(exp2) ## ----updateFnames, echo=TRUE, cache=FALSE-------------------------------- head(featureNames(exp1)) exp1 <- updateFeatureNames(exp1) head(featureNames(exp1)) head(featureNames(exp2)) exp2 <- updateFeatureNames(exp2) head(featureNames(exp2)) ## ----comb1, echo=TRUE, cache=FALSE--------------------------------------- exp12 <- combine(exp1, exp2) dim(exp1) dim(exp2) dim(exp12) ## ----make2exps2, echo=TRUE, cache=FALSE, tidy=FALSE---------------------- set.seed(1) i <- sample(length(itraqdata), 35) j <- sample(length(itraqdata), 35) exp1 <- quantify(itraqdata[i], reporters = iTRAQ4, parallel = FALSE, verbose = FALSE) exp2 <- quantify(itraqdata[j], reporters = iTRAQ4, parallel = FALSE, verbose = FALSE) exp1 <- droplevels(exp1) exp2 <- droplevels(exp2) table(featureNames(exp1) %in% featureNames(exp2)) exp1 <- combineFeatures(exp1, groupBy = fData(exp1)$ProteinAccession) exp2 <- combineFeatures(exp2, groupBy = fData(exp2)$ProteinAccession) head(featureNames(exp1)) head(featureNames(exp2)) ## ----renameSamples, echo=TRUE, cache=FALSE------------------------------- sampleNames(exp1) exp1 <- updateSampleNames(exp1) sampleNames(exp1) sampleNames(exp1) <- c("Ctrl1", "Cond1", "Ctrl2", "Cond2") sampleNames(exp2) <- c("Ctrl3", "Cond3", "Ctrl4", "Cond4") ## ----fdatanames, echo=TRUE, cache=FALSE---------------------------------- stopifnot(all(fvarLabels(exp1) == fvarLabels(exp2))) fData(exp1)["BSA", 1:4] fData(exp2)["BSA", 1:4] ## ----renameFvars, echo=TRUE, cache=FALSE--------------------------------- exp1 <- updateFvarLabels(exp1) exp2 <- updateFvarLabels(exp2) head(fvarLabels(exp1)) head(fvarLabels(exp2)) ## ----combine, echo=TRUE, cache=FALSE------------------------------------- exp12 <- combine(exp1, exp2) dim(exp12) pData(exp12) exprs(exp12)[25:28, ] exp12 ## ----sessioninfo, results='asis', echo=FALSE, cache=FALSE---------------- toLatex(sessionInfo())