--- title: "Processing and Analyzing RNA-Seq Data with `biotmle`" author: "[Nima Hejazi](https://nimahejazi.org)" date: "`r Sys.Date()`" output: BiocStyle::html_document bibliography: ../inst/REFERENCES.bib vignette: > %\VignetteIndexEntry{Processing and Analyzing RNA-Seq Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, message=FALSE, warning=FALSE} library(dplyr) library(biotmle) library(biotmleData) suppressMessages(library(SummarizedExperiment)) "%ni%" = Negate("%in%") ``` ## Introduction Here, we briefly work through how to use the `biotmle` package with data generated by next-generation sequencing technologies, which, in contrast to microarray technologies, produce measurements in the form of discrete counts. --- ## Simulation: Data Structure ```{r simNGS} set.seed(6423709) n <- 50 g <- 2500 cases_pois <- 50 controls_pois <- 10 ngs_cases <- as.data.frame(matrix(replicate(n, rpois(g, cases_pois)), g)) ngs_controls <- as.data.frame(matrix(replicate(n, rpois(g, controls_pois)), g)) ngs_data <- as.data.frame(cbind(ngs_cases, ngs_controls)) exp_var <- c(rep(1, n), rep(0, n)) batch <- rep(1:2, n) covar <- rep(1, n * 2) design <- as.data.frame(cbind(exp_var, batch, covar)) head(ngs_data[, 1:7]) ``` --- ## Processing: Data Transformation ```{r data_proc} se <- SummarizedExperiment(assays = list(counts = DataFrame(ngs_data)), colData = DataFrame(design)) se ``` --- ## Analysis: Assessing the Effect of Exposure ```{r biomarkertmle, eval=FALSE} rnaseqTMLEout <- biomarkertmle(se = se, varInt = 1, ngscounts = TRUE, parallel = TRUE, family = "gaussian", g_lib = c("SL.mean", "SL.glm", "SL.randomForest"), Q_lib = c("SL.mean", "SL.glm", "SL.randomForest", "SL.nnet") ) head(eif(rnaseqTMLEout)$E[, seq_len(6)]) ``` ```{r load_biomarkerTMLE_result, echo=FALSE} data(rnaseqtmleOut) head(eif(rnaseqTMLEout)$E[, seq_len(6)]) ``` ```{r modtest_ic} limmaTMLEout <- modtest_ic(biotmle = rnaseqTMLEout) head(toptable(limmaTMLEout)) ``` --- ## Results: Data Visualization ```{r pval_hist_limma_adjp} plot(x = limmaTMLEout, type = "pvals_adj") ``` ```{r pval_hist_limma_rawp} plot(x = limmaTMLEout, type = "pvals_raw") ``` ```{r heatmap_limma_results} varInt_index <- which(names(colData(se)) %in% "exp_var") designVar <- as.data.frame(colData(se))[, varInt_index] design <- as.numeric(designVar == max(designVar)) heatmap_ic(x = limmaTMLEout, row.dendrogram = TRUE, clustering.method = "hierarchical", design = design, FDRcutoff = 1.0, top = 10) ``` ```{r volcano_plot_limma_results} volcano_ic(biotmle = limmaTMLEout) ``` --- ## Session Information ```{r sessionInfo, echo=FALSE} sessionInfo() ```