## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ---- eval=FALSE-------------------------------------------------------------- # if(!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("randRotation") ## ----message=FALSE------------------------------------------------------------ library(randRotation) set.seed(0) # Dataframe of phenotype data (sample information) pdata <- data.frame(batch = as.factor(rep(1:3, c(10,10,10))), phenotype = rep(c("Control", "Cancer"), c(5,5))) features <- 1000 # Matrix with random gene expression data edata <- matrix(rnorm(features * nrow(pdata)), features) rownames(edata) <- paste("feature", 1:nrow(edata)) xtabs(data = pdata) ## ----------------------------------------------------------------------------- mod1 <- model.matrix(~1+phenotype, pdata) head(mod1) ## ----------------------------------------------------------------------------- rr <- initBatchRandrot(Y = edata, X = mod1, coef.h = 2, batch = pdata$batch) ## ----------------------------------------------------------------------------- statistic <- function(Y, batch, mod){ # (I) Batch effect correction with "Combat" from the "sva" package Y <- sva::ComBat(dat = Y, batch = batch, mod = mod) # (II) Linear model fit fit1 <- lm.fit(x = mod, y = t(Y)) abs(t(coef(fit1))[,2]) } ## ----message=FALSE, results='hide'-------------------------------------------- rs1 <- rotateStat(initialised.obj = rr, R = 10, statistic = statistic, batch = pdata$batch, mod = mod1) ## ----------------------------------------------------------------------------- rs1 ## ----------------------------------------------------------------------------- p.vals <- pFdr(rs1) hist(p.vals, col = "lightgreen");abline(h = 100, col = "blue", lty = 2) qqunif(p.vals) ## ----------------------------------------------------------------------------- edata.combat <- sva::ComBat(dat = edata, batch = pdata$batch, mod = mod1) fit1 <- lm.fit(x = mod1, y = t(edata.combat)) # t-statistics var.beta <- diag(solve(t(mod1)%*%mod1)) s2 <- colSums(resid(fit1)^2) / df.residual(fit1) t1 <- t(coef(fit1) / sqrt(var.beta)) / sqrt(s2) # P-values from t-statistics p.vals.nonrot <- 2*pt(abs(t1), df.residual(fit1), lower.tail = FALSE) p.vals.nonrot <- p.vals.nonrot[,2] hist(p.vals.nonrot, col = "lightgreen");abline(h = 100, col = "blue", lty = 2) qqunif(p.vals.nonrot) plot(p.vals, p.vals.nonrot, log = "xy", pch = 20) abline(0,1, col = 4, lwd = 2) ## ----------------------------------------------------------------------------- # Specification of the full model mod1 <- model.matrix(~1+phenotype, pdata) # We select "phenotype" as the coefficient associated with H0 # All other coefficients are considered as "determined" coefficients rr <- initRandrot(Y = edata, X = mod1, coef.h = 2) coefs <- function(Y, mod){ t(coef(lm.fit(x = mod, y = t(Y)))) } # Specification of the H0 model mod0 <- model.matrix(~1, pdata) coef01 <- coefs(edata, mod0) coef02 <- coefs(randrot(rr), mod0) head(cbind(coef01, coef02)) all.equal(coef01, coef02) ## ----------------------------------------------------------------------------- coef11 <- coefs(edata, mod1) coef12 <- coefs(randrot(rr), mod1) head(cbind(coef11, coef12)) ## ----message=FALSE, results='hold'-------------------------------------------- library(limma) mod1 = model.matrix(~phenotype, pdata) ## ----------------------------------------------------------------------------- # Linear model fit fit0 <- lmFit(edata, mod1) fit0 <- eBayes(fit0) # P values for phenotype coefficient p0 <- topTable(fit0, coef = 2, number = Inf, adjust.method = "none", sort.by = "none")$P.Value hist(p0, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1)) abline(1,0, col = "blue", lty = 2) qqunif(p0) ## ----------------------------------------------------------------------------- library(sva) edata.combat = ComBat(edata, batch = pdata$batch, mod = mod1) ## ----------------------------------------------------------------------------- # Linear model fit fit1 <- lmFit(edata.combat, mod1) fit1 <- eBayes(fit1) # P value for phenotype coefficient p.combat <- topTable(fit1, coef = 2, number = Inf, adjust.method = "none", sort.by = "none")$P.Value hist(p.combat, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1)) abline(1,0, col = "blue", lty = 2) qqunif(p.combat) ## ----------------------------------------------------------------------------- init1 <- initBatchRandrot(edata, mod1, coef.h = 2, batch = pdata$batch) ## ----------------------------------------------------------------------------- statistic <- function(Y, batch, mod, coef){ Y.tmp <- sva::ComBat(dat = Y, batch = batch, mod = mod) fit1 <- limma::lmFit(Y.tmp, mod) fit1 <- limma::eBayes(fit1) # The "abs" is needed for "pFdr" to calculate 2-tailed statistics abs(fit1$t[,coef]) } ## ----message=FALSE, results='hide'-------------------------------------------- res1 <- rotateStat(initialised.obj = init1, R = 10, statistic = statistic, batch = pdata$batch, mod = mod1, coef = 2) ## ----------------------------------------------------------------------------- p.rot <- pFdr(res1) head(p.rot) hist(p.rot, freq = FALSE, col = "lightgreen", breaks = seq(0,1,0.1)) abline(1,0, col = "blue", lty = 2) qqunif(p.rot) ## ----------------------------------------------------------------------------- plot(density(log(p.rot/p0)), col = "salmon", "Log p ratios", panel.first = abline(v=0, col = "grey"), xlim = range(log(c(p.rot/p0, p.combat/p0)))) lines(density(log(p.combat/p0)), col = "blue") legend("topleft", legend = c("log(p.combat/p0)", "log(p.rot/p0)"), lty = 1, col = c("blue", "salmon")) ## ----------------------------------------------------------------------------- plot(p0, p.combat, log = "xy", pch = 20, col = "lightblue", ylab = "") points(p0, p.rot, pch = 20, col = "salmon") abline(0,1, lwd = 1.5, col = "black") legend("topleft", legend = c("p.combat", "p.rot"), pch = 20, col = c("lightblue", "salmon")) ## ----------------------------------------------------------------------------- plot(density(log(p.combat/p.rot)), col = "blue", main = "log(p.combat / p.rot )", panel.first = abline(v=0, col = "grey")) ## ----------------------------------------------------------------------------- fdr.q <- pFdr(res1, "fdr.q") fdr.qu <- pFdr(res1, "fdr.q") fdr.BH <- pFdr(res1, "BH") FDRs <- cbind(fdr.q, fdr.qu, fdr.BH) ord1 <- order(res1$s0, decreasing = TRUE) FDRs.sorted <- FDRs[ord1,] matplot(FDRs.sorted, type = "l", lwd = 2) legend("bottomright", legend = c("fdr.q", "fdr.qu", "BH"), lty = 1:5, lwd = 2, col = 1:6) head(FDRs.sorted) ## ----------------------------------------------------------------------------- mapping <- function(Y) abs(Y) df_estimate(edata, mapping = mapping) ncol(edata) ## ----------------------------------------------------------------------------- mapping <- function(Y) floor(Y) df_estimate(edata, mapping = mapping) ## ----------------------------------------------------------------------------- mapping <- function(Y, batch, mod) { limma::removeBatchEffect(Y, batch = batch, design = mod) } df_estimate(edata, mapping = mapping, batch = pdata$batch, mod = mod1) ## ----------------------------------------------------------------------------- sessionInfo()