## ----setup,include=FALSE------------------------------------------------------ # Install package if necessary (to be used before running the vignette) # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("limpca") library(ggplot2) library(limpca) if (!requireNamespace("gridExtra", quietly = TRUE)) { stop("Install 'pander' to knit this vignette") } library(gridExtra) if (!requireNamespace("pander", quietly = TRUE)) { stop("Install 'pander' to knit this vignette") } library(pander) if (!requireNamespace("car", quietly = TRUE)) { stop("Install 'pander' to knit this vignette") } library(car) knitr::opts_chunk$set( message = FALSE, warning = TRUE, comment = NA, crop = NULL, width = 60, dpi = 50, fig.width = 5, fig.height = 3.5, dev = "png" ) ## ----eval=FALSE--------------------------------------------------------------- # library(car) # library(pander) # library(gridExtra) # library(ggplot2) ## ----Install, eval=FALSE------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("limpca") ## ----Load, eval=FALSE--------------------------------------------------------- # library("limpca") ## ----Data Importation--------------------------------------------------------- data("trout") # print number of and response names cat("\n Nb of Responses : ", ncol(trout$outcomes), "\n ") cat("\nResponses :\n", colnames(trout$outcomes), "\n ") # Order responses by alphabetic order trout$outcomes <- trout$outcomes[, order(dimnames(trout$outcomes)[[2]])] cat("\n Ordered responses :\n ", colnames(trout$outcomes), "\n ") # print factor names cat("\nDesign factors : ", colnames(trout$design), "\n ") # plot the design with plotDesign function limpca::plotDesign( design = trout$design, x = "Treatment", y = "Day", cols = "Exposure", title = "Initial design of the trout dataset" ) ## ----Exploratory PCA---------------------------------------------------------- resPCA <- limpca::pcaBySvd(trout$outcomes) limpca::pcaScreePlot(resPCA, nPC = 8) limpca::pcaScorePlot( resPcaBySvd = resPCA, axes = c(1, 2), title = "Score plot of original data ", design = trout$design, color = "Aquarium", points_labs_rn = TRUE ) ## ----Transformation----------------------------------------------------------- # Log Transformation trout_log <- trout trout_log$outcomes <- as.matrix(log10(trout$outcomes)) # new PCA resPCA1 <- limpca::pcaBySvd(trout_log$outcomes) limpca::pcaScreePlot(resPCA1, nPC = 8) limpca::pcaScorePlot( resPcaBySvd = resPCA1, axes = c(1, 2), title = "Score plot of Log10 data ", design = trout_log$design, color = "Aquarium", drawShapes = "polygon", points_labs_rn = TRUE ) ## ----Remove Outliers---------------------------------------------------------- # Remove outliers and create new dataset trout_clean <- trout_log outliers <- match( c("D72EPA2.1", "D28EPA2.2"), rownames(trout_log$outcomes) ) trout_clean$outcomes <- trout_log$outcomes[-outliers, ] trout_clean$design <- trout_log$design[-outliers, ] ## ----new PCA------------------------------------------------------------------ # PCA resPCA2 <- limpca::pcaBySvd(trout_clean$outcomes) limpca::pcaScreePlot(resPCA2, nPC = 8) # Score plot Components 1 and 2 limpca::pcaScorePlot( resPcaBySvd = resPCA2, axes = c(1, 2), title = "Score plot of Log10 data without outliers (PC 1&2)", design = trout_clean$design, color = "Aquarium", drawShapes = "polygon", points_labs_rn = FALSE ) # Score plot Components 3 and 4 limpca::pcaScorePlot( resPcaBySvd = resPCA2, axes = c(3, 4), title = "Score plot of Log10 data without outliers (PC 3&4)", design = trout_clean$design, color = "Aquarium", drawShapes = "polygon", points_labs_rn = FALSE ) ## ----Mean aggregation--------------------------------------------------------- # Mean aggregation mean_outcomes <- matrix(0, nrow = 24, ncol = 15) mean_design <- matrix(0, nrow = 24, ncol = 3) y <- list( trout_clean$design[["Day"]], trout_clean$design[["Treatment"]], trout_clean$design[["Exposure"]] ) for (i in 1:15) { mean_outcomes[, i] <- aggregate(trout_clean$outcomes[, i], by = y, mean)[, 4] } mean_design <- aggregate(trout_clean$outcomes[, 1], by = y, mean)[, c(1:3)] # Set row and col names colnames(mean_outcomes) <- colnames(trout_clean$outcomes) colnames(mean_design) <- colnames(trout_clean$design)[1:3] trout_mean_names <- apply(mean_design, 1, paste, collapse = "") rownames(mean_outcomes) <- trout_mean_names rownames(mean_design) <- trout_mean_names ## ----Centering and Scaling---------------------------------------------------- # Outcomes centering and Scaling mean_outcomes <- scale(mean_outcomes, center = TRUE, scale = TRUE) # New data object creation trout_mean <- list( "outcomes" = mean_outcomes, "design" = mean_design, "formula" = trout$formula ) # Clean objects rm( resPCA, resPCA1, resPCA2, y, mean_design, mean_outcomes, trout_mean_names ) ## ----Design------------------------------------------------------------------- pander(head(trout_mean$design)) limpca::plotDesign( design = trout_mean$design, title = "Design of mean aggregated trout dataset" ) ## ----Lineplot----------------------------------------------------------------- limpca::plotLine(trout_mean$outcomes, rows = c(1, 24), xaxis_type = "character", type = "s" ) + ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1)) ## ----PCA---------------------------------------------------------------------- resPCA_mean <- limpca::pcaBySvd(trout_mean$outcomes) pcaScreePlot(resPCA_mean, nPC = 6) ## ----------------------------------------------------------------------------- limpca::pcaScorePlot( resPcaBySvd = resPCA_mean, axes = c(1, 2), title = "PCA score plot by Exposure and Day", design = trout_mean$design, shape = "Exposure", color = "Day", points_labs_rn = FALSE ) limpca::pcaScorePlot( resPcaBySvd = resPCA_mean, axes = c(1, 2), title = "PCA scores plot by Treatment and Day", design = trout_mean$design, shape = "Day", color = "Treatment", points_labs_rn = FALSE ) limpca::pcaScorePlot( resPcaBySvd = resPCA_mean, axes = c(1, 2), title = "PCA scores plot by Exposure and Treatment", design = trout_mean$design, shape = "Treatment", color = "Exposure", points_labs_rn = FALSE ) ## ----------------------------------------------------------------------------- limpca::pcaLoading1dPlot( resPcaBySvd = resPCA_mean, axes = c(1, 2), title = "PCA loadings plot trout", xlab = " ", ylab = "Expression", xaxis_type = "character", type = "s" ) + ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1)) ## ----------------------------------------------------------------------------- limpca::pcaLoading2dPlot( resPcaBySvd = resPCA_mean, axes = c(1, 2), title = "PCA loadings plot trout", addRownames = TRUE ) ## ----Scatterplot Matrix,fig.width=10,fig.height=10---------------------------- limpca::plotScatterM( Y = trout_mean$outcomes, cols = c(1:15), design = trout_mean$design, varname.colorup = "Day", vec.colorup = c("CadetBlue4", "pink", "orange"), varname.colordown = "Day", vec.colordown = c("CadetBlue4", "pink", "orange"), varname.pchup = "Treatment", varname.pchdown = "Exposure" ) ## ----ModelMatrix-------------------------------------------------------------- resLmpModelMatrix <- limpca::lmpModelMatrix(trout_mean) pander::pander(head(resLmpModelMatrix$modelMatrix)) ## ----EffectMatrices----------------------------------------------------------- resLmpEffectMatrices <- lmpEffectMatrices(resLmpModelMatrix) resLmpEffectMatrices$varPercentagesPlot ## ----Bootstrap---------------------------------------------------------------- resLmpBootstrapTests <- lmpBootstrapTests( resLmpEffectMatrices = resLmpEffectMatrices, nboot = 1000 ) # Print p-values pander::pander(t(resLmpBootstrapTests$resultsTable)) ## ----ASCA PCA----------------------------------------------------------------- resASCA <- lmpPcaEffects( resLmpEffectMatrices = resLmpEffectMatrices, method = "ASCA", combineEffects = list(c( "Day", "Treatment", "Day:Treatment" )) ) ## ----ASCA Contrib------------------------------------------------------------- resLmpContributions <- lmpContributions(resASCA) ## ----ASCA ContribP------------------------------------------------------------ pander::pander(resLmpContributions$totalContribTable) pander::pander(resLmpContributions$effectTable) pander::pander(resLmpContributions$contribTable) pander::pander(resLmpContributions$combinedEffectTable) ## Visualize the more important contributions resLmpContributions$plotContrib ## ----ASCA Day----------------------------------------------------------------- A <- lmpScorePlot(resASCA, effectNames = "Day", color = "Day", shape = "Day" ) B <- lmpLoading2dPlot(resASCA, effectNames = "Day", points_labs = colnames(trout$outcomes) ) grid.arrange(A, B, ncol = 2) ## ----ASCA Treatment----------------------------------------------------------- A <- lmpScorePlot(resASCA, effectNames = "Treatment", color = "Treatment", shape = "Treatment" ) B <- lmpLoading2dPlot(resASCA, effectNames = "Treatment", points_labs = colnames(trout$outcomes) ) grid.arrange(A, B, ncol = 2) ## ----ASCA DayTreatment-------------------------------------------------------- A <- lmpScorePlot(resASCA, effectNames = "Day:Treatment", color = "Treatment", shape = "Day" ) B <- lmpLoading2dPlot(resASCA, effectNames = "Day:Treatment", points_labs = colnames(trout$outcomes) ) grid.arrange(A, B, ncol = 2) ## ----ASCA Combined effect----------------------------------------------------- A <- lmpScorePlot(resASCA, effectNames = "Day+Treatment+Day:Treatment", color = "Treatment", shape = "Day" ) B <- lmpLoading2dPlot(resASCA, effectNames = "Day+Treatment+Day:Treatment", points_labs = colnames(trout$outcomes) ) grid.arrange(A, B, ncol = 2) ## ----ASCA Residuals----------------------------------------------------------- A <- lmpScorePlot(resASCA, effectNames = "Residuals", color = "Treatment", shape = "Day" ) B <- lmpLoading2dPlot(resASCA, effectNames = "Residuals", points_labs = colnames(trout$outcomes) ) grid.arrange(A, B, ncol = 2) ## ----ASCA Effects------------------------------------------------------------- A <- lmpEffectPlot(resASCA, effectName = "Day:Treatment", x = "Day", z = "Treatment", axes = c(1, 2) ) A$PC1 <- A$PC1 + ggtitle("PC1: Day:Treatment effect alone") A$PC2 <- A$PC2 + ggtitle("PC2: Day:Treatment effect alone") grid.arrange(A$PC1, A$PC2, ncol = 2) A <- lmpEffectPlot(resASCA, effectName = "Day+Treatment+Day:Treatment", x = "Day", z = "Treatment", axes = c(1, 2) ) A$PC1 <- A$PC1 + ggtitle("PC1: Combined D+T+D:T effects") A$PC2 <- A$PC2 + ggtitle("PC2: Combined D+T+D:T effects") grid.arrange(A$PC1, A$PC2, ncol = 2) ## ----APCA PCA----------------------------------------------------------------- resAPCA <- lmpPcaEffects( resLmpEffectMatrices = resLmpEffectMatrices, method = "APCA" ) ## ----APCA Scores-------------------------------------------------------------- # Day Effect lmpScorePlot(resAPCA, effectNames = "Day", color = "Day", shape = "Day", drawShapes = "ellipse" ) # Treatment Effect lmpScorePlot(resAPCA, effectNames = "Treatment", color = "Treatment", shape = "Treatment", drawShapes = "ellipse" ) # Exposure Effect lmpScorePlot(resAPCA, effectNames = "Exposure", color = "Exposure", shape = "Exposure", drawShapes = "ellipse" ) # Day:Treatment Effect lmpScorePlot(resAPCA, effectNames = "Day:Treatment", color = "Treatment", shape = "Day", drawShapes = "polygon" ) ## ----Anova-------------------------------------------------------------------- # Creation of a matrix to store the p-values m <- ncol(trout_mean$outcomes) mat_pval <- matrix(nrow = m, ncol = 6) dimnames(mat_pval) <- list( dimnames(trout_mean$outcomes)[[2]], c( "Day", "Treatment", "Exposure", "Day:Treatment", "Day:Exposure", "Treatment:Exposure" ) ) # Parallel ANOVA modeling for (i in 1:m) { data <- cbind(y = trout_mean$outcomes[, i], trout_mean$design) Modl <- lm(y ~ Day + Treatment + Exposure + Day:Treatment + Day:Exposure + Treatment:Exposure, contrasts = list(Day = contr.sum, Treatment = contr.sum, Exposure = contr.sum), data = data ) tabanova <- Anova(Modl, type = 3) mat_pval[i, ] <- tabanova[2:7, 4] } # FDR p-values correction for (i in 1:6) mat_pval[, i] <- p.adjust(mat_pval[, i], method = "BH") ## ----------------------------------------------------------------------------- pander(mat_pval) heatmap(-log10(mat_pval), Rowv = NA, Colv = "Rowv", cexCol = 0.8, scale = "none", main = "Heatmap of -log10(q-values)" ) ## ----eval=TRUE---------------------------------------------------------------- Effects <- c("Day", "Treatment", "Day:Treatment") for (i in 1:3) { Pval_log <- -log10(mat_pval[, Effects[i]]) resA <- resASCA[[Effects[i]]] PC12Load <- as.vector(sqrt(resA$loadings[, 1:2]^2 %*% resA$singvar[1:2]^2)) matres <- cbind(PC12Load, Pval_log) A[[i]] <- plotScatter( Y = matres, xy = c(1, 2), points_labs = rownames(matres), xlab = "PC1&2 ASCA loadings", ylab = "-log10(p-values)", title = paste(Effects[i], "effect") ) } A[[1]] A[[2]] A[[3]] ## ----------------------------------------------------------------------------- sessionInfo()