## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", crop = NULL ## Related to https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html ) ## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE---------------- ## Track time spent on making the vignette startTime <- Sys.time() ## Bib setup library("RefManageR") ## Write bibliography information bib <- c( R = citation(), bsp2 = RefManageR::BibEntry( bibtype = "article", key = "bsp2", author = "Leonardo Collado-Torres and Emily E. Burke and Amy Peterson and JooHeon Shin and Richard E. Straub and Anandita Rajpurohit and Stephen A. Semick and William S. Ulrich and Amanda J. Price and Cristian Valencia and Ran Tao and Amy Deep-Soboslay and Thomas M. Hyde and Joel E. Kleinman and Daniel R. Weinberger and Andrew E. Jaffe", title = "Regional Heterogeneity in Gene Expression, Regulation, and Coherence in the Frontal Cortex and Hippocampus across Development and Schizophrenia", year = 2019, doi = "0.1016/j.neuron.2019.05.013", journal = "Neuron", month = "jul", publisher = "Elsevier BV", volume = "103", number = "2", pages = "203--216.e8", ), BiocFileCache = citation("BiocFileCache")[1], BiocStyle = citation("BiocStyle")[1], covr = citation("covr")[1], ggplot2 = citation("ggplot2")[1], knitr = citation("knitr")[1], limma = citation("limma")[1], RefManageR = citation("RefManageR")[1], rmarkdown = citation("rmarkdown")[1], sessioninfo = citation("sessioninfo")[1], testthat = citation("testthat")[1], qsvaR = citation("qsvaR")[1], SummarizedExperiment = citation("SummarizedExperiment")[1], sva = citation("sva")[1] ) ## ----"install", eval = FALSE-------------------------------------------------- # ## To install Bioconductor packages # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("qsvaR") # # ## Check that you have a valid Bioconductor installation # BiocManager::valid() # # ## You can install the development version from GitHub with: # BiocManager::install("LieberInstitute/qsvaR") ## ----"citation"--------------------------------------------------------------- ## Citation info citation("qsvaR") ## ----"start", message=FALSE--------------------------------------------------- library("qsvaR") library("limma") library("BiocFileCache") ## ----DesignDiagram,fig.cap="The diagram on the right shows the 2016 experimental design for qSVA.The diagram on the right shows the 2022 experimental design for qSVA.", echo=FALSE---- knitr::include_graphics("../man/figures/qsva_experimental_design.png") ## ----"Download demo dataset"-------------------------------------------------- bfc <- BiocFileCache::BiocFileCache() ## Download brainseq phase 2 ## rse_file <- BiocFileCache::bfcrpath( "https://s3.us-east-2.amazonaws.com/libd-brainseq2/rse_tx_unfiltered.Rdata", x = bfc ) load(rse_file, verbose = TRUE) ## keep only adult samples and apply minimum expression cutoff rse_tx <- rse_tx[, rse_tx$Age > 17] rse_tx <- rse_tx[rowMeans(assays(rse_tx)$tpm) > 0.3, ] ## ----VennDiagram,fig.cap="The above venn diagram shows the overlap between transcripts in each of the previously mentioned models.", echo=FALSE---- knitr::include_graphics("../man/figures/transcripts_venn_diagramm.png") ## ----"Get Degraded Transcripts"----------------------------------------------- # obtain transcripts with degradation signature DegTx <- getDegTx( rse_tx, sig_transcripts = select_transcripts(cell_component = TRUE) ) dim(DegTx) ## ----"getPCs demo"------------------------------------------------------------ pcTx <- getPCs(rse_tx = DegTx, assayname = "tpm") ## ----"k_qsvs demo"------------------------------------------------------------ # design a basic model matrix to model the number of pcs needed mod <- model.matrix(~ Dx + Age + Sex + Race + Region, data = colData(rse_tx) ) ## To ensure that the results are reproducible, you will need to set a ## random seed with the set.seed() function. Internally, we are using ## sva::num.sv() which needs a random seed to ensure reproducibility of the ## results. set.seed(20230621) # use k qsvs function to return a integer of pcs needed k <- k_qsvs(rse_tx = DegTx, mod = mod, assayname = "tpm") print(k) ## ----"get_qsvs demo"---------------------------------------------------------- # get_qsvs use k to subset our pca analysis qsvs <- get_qsvs(qsvPCs = pcTx, k = k) dim(qsvs) ## ----"wrapper function"------------------------------------------------------- ## To ensure that the results are reproducible, you will need to set a ## random seed with the set.seed() function. Internally, we are using ## sva::num.sv() which needs a random seed to ensure reproducibility of the ## results. set.seed(20230621) ## Example use of the wrapper function qSVA() qsvs_wrapper <- qSVA( rse_tx = rse_tx, sig_transcripts = select_transcripts(cell_component = TRUE), mod = mod, assayname = "tpm" ) dim(qsvs_wrapper) ## ----"perform DE"------------------------------------------------------------- # create a model.matrix with demographic info and qsvs mod_qSVA <- cbind(mod, qsvs) # log tranform transcript expression txExprs <- log2(assays(rse_tx)$tpm + 1) # linear model differential expression fitTx <- lmFit(txExprs, mod_qSVA) # generate empirical bayes for DE eBTx <- eBayes(fitTx) # get DE results table sigTx <- topTable(eBTx, coef = 2, p.value = 1, number = nrow(rse_tx) ) head(sigTx) ## ----"DE plot"---------------------------------------------------------------- # get expression for most significant gene yy <- txExprs[rownames(txExprs) == rownames(sigTx)[1], ] ## Visualize the expression of this gene using boxplots p <- boxplot(yy ~ rse_tx$Dx, outline = FALSE, ylim = range(yy), ylab = "log2 Exprs", xlab = "", main = paste(rownames(sigTx)[1]) ) ## ----"perform DE without qSVs"------------------------------------------------ # log tranform transcript expression txExprs_noqsv <- log2(assays(rse_tx)$tpm + 1) # linear model differential expression with generic model fitTx_noqsv <- lmFit(txExprs_noqsv, mod) # generate empirical bayes for DE eBTx_noqsv <- eBayes(fitTx_noqsv) # get DE results table sigTx_noqsv <- topTable(eBTx_noqsv, coef = 2, p.value = 1, number = nrow(rse_tx) ) ## Explore the top results head(sigTx_noqsv) ## ----"subset to t-statistics"------------------------------------------------- ## Subset the topTable() results to keep just the t-statistic DE_noqsv <- data.frame(t = sigTx_noqsv$t, row.names = rownames(sigTx_noqsv)) DE <- data.frame(t = sigTx$t, row.names = rownames(sigTx)) ## Explore this data.frame() head(DE) ## ----"Generate DEqual without qSVs",fig.cap="Result of Differential Expression without qSVA normalization."---- ## Generate a DEqual() plot using the model results without qSVs DEqual(DE_noqsv) ## ----"Generate DEqual with qSVs",fig.cap="Result of Differential Expression with qSVA normalization."---- ## Generate a DEqual() plot using the model results with qSVs DEqual(DE) ## ----createVignette, eval=FALSE----------------------------------------------- # ## Create the vignette # library("rmarkdown") # system.time(render("Intro_qsvaR.Rmd", "BiocStyle::html_document")) # # ## Extract the R code # library("knitr") # knit("Intro_qsvaR.Rmd", tangle = TRUE) ## ----reproduce1, echo=FALSE--------------------------------------------------- ## Date the vignette was generated Sys.time() ## ----reproduce2, echo=FALSE--------------------------------------------------- ## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits = 3) ## ----reproduce3, echo=FALSE------------------------------------------------------------------------------------------- ## Session info library("sessioninfo") options(width = 120) session_info() ## ----vignetteBiblio, results = "asis", echo = FALSE, warning = FALSE, message = FALSE--------------------------------- ## Print bibliography PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))