## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, include = FALSE--------------------------------------------------- library(broadSeq) ## ----fig.cap = "Input and output data structures of different methods to idetify differentially expressed genes.", echo = FALSE---- knitr::include_graphics("img/io_p.png") ## ----echo = FALSE,fig.cap="Flowchart"----------------------------------------- knitr::include_graphics("img/broadSeq.png") ## ----eval = FALSE------------------------------------------------------------- # library(broadSeq) # library(ggplot2) ## ----message=FALSE------------------------------------------------------------ se <- readRDS(system.file("extdata","rat_vole_mouseSE_salmon.rds", package = "broadSeq")) SummarizedExperiment::assayNames(se) ## ----------------------------------------------------------------------------- as.data.frame(colData(se)) %>% dplyr::count(stage,species) %>% tidyr::spread(stage,n) se$stage <- factor(se$stage,levels = c("Bud","Cap","Late Cap","Bell")) ## ----lowExpr,warning=FALSE,fig.width= 6--------------------------------------- assays(se)[["counts"]][,5] %>% ggpubr::ggdensity(y = "count")+ ggplot2::geom_vline(xintercept = 10)+ggplot2::scale_x_log10() keep <- (assays(se)[["counts"]] >= 3) %>% rowSums() >= 5 # smallest Group Size is 5 table(keep) ## ----warning=FALSE,message=FALSE---------------------------------------------- se <- broadSeq::normalizeEdgerCPM(se ,method = "none",cpm.log = TRUE ) ## The normalized values are added with the assay name "logCPM" SummarizedExperiment::assayNames(se) ## ----warning=FALSE,message=FALSE---------------------------------------------- se <- broadSeq::normalizeEdgerCPM(se , method = "TMM", cpm.log = FALSE ) ## The normalized values are added with the assay name "TMM" SummarizedExperiment::assayNames(se) ## ----------------------------------------------------------------------------- assays(se)[["counts"]][1:5,1:5] assays(se)[["TMM"]][1:5,1:5] assays(se)[["logCPM"]][1:5,1:5] ## ----warning=FALSE------------------------------------------------------------ se <- broadSeq::transformDESeq2(se,method = "vst" ) ## ----------------------------------------------------------------------------- se <- broadSeq::transformDESeq2(se, method = "normTransform" ) ## ----eval=FALSE--------------------------------------------------------------- # se <- broadSeq::transformDESeq2(se, method = "rlog") ## ----------------------------------------------------------------------------- SummarizedExperiment::assayNames(se) ## ----warning=FALSE,fig.height=6,fig.width=8----------------------------------- p <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse" ], assayName = "counts", fill = "stage", yscale = "log2")+ rremove("x.text") p1 <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse"], assayName = "vst", fill = "stage")+ rremove("x.text") p2 <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse"], assayName = "TMM", fill = "stage", yscale = "log10")+ rremove("x.text") p3 <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse"], assayName = "logCPM", fill = "stage")+ rremove("x.text") ggarrange(p,p1,p2,p3, common.legend = TRUE, labels = c("A","B","C")) ## ----eval=FALSE--------------------------------------------------------------- # if (requireNamespace("vsn", quietly = TRUE)) { # library("vsn") # x <- meanSdPlot( # log2(assays(se[, se$species == "Rat"])[["counts"]]+1), # plot = FALSE) # print(x$gg +ggtitle(label = "log2(n+1) ")) # # x <- meanSdPlot( # assays(se[, se$species == "Rat"])[["vst"]], # plot = FALSE) # # print(x$gg +ggtitle(label = "Vst")) # # x <- meanSdPlot( # assays(se[, se$species == "Rat"])[["logCPM"]], # plot = FALSE) # print(x$gg + ggtitle(label = "logCPM")) # } # ## ----message=FALSE,warning=FALSE---------------------------------------------- ## Multiple assay of a single gene broadSeq::assay_plot(se, feature = c("Shh"), assayNames = c("counts","logCPM","vst","TMM"), x = "stage", fill="species", add="dotplot", palette = "npg") ## Expression of multiple genes from a single assay broadSeq::genes_plot(se, features = c("Shh","Edar"), facet.by = "symbol", x = "stage", assayName = "vst", fill="species", palette = "jco") ## ----warning=FALSE,message=FALSE,fig.height=6,fig.width=8--------------------- jco <- broadSeq::genes_plot(se[,se$species == "Mouse"], features = c("Shh"), facet.by = "symbol", assayName = "logCPM", x = "stage", fill="stage", add="dotplot", xlab = "", title = "Journal of Clinical Oncology", palette = "jco") npg <- broadSeq::genes_plot(se[,se$species == "Mouse"], features = c("Shh"), facet.by = "symbol",assayName = "logCPM", x = "stage", fill="stage", add="dotplot", xlab = "", title = "Nature Publishing Group", palette = "npg") aaas <- broadSeq::genes_plot(se[,se$species == "Mouse"], features = c("Shh"), facet.by = "symbol", assayName = "logCPM", x = "stage", fill="stage", add="dotplot", xlab = "", title = "Science", palette = "aaas") nejm <- broadSeq::genes_plot(se[,se$species == "Mouse"], features = c("Shh"), facet.by = "symbol", assayName = "logCPM", x = "stage", fill="stage", add="dotplot", xlab = "", title = "New England Journal of Medicine",palette = "nejm") # ggarrange(jco,npg,aaas,nejm, # common.legend = TRUE,legend = "none", # labels = c("A","B","C","D")) ggarrange(jco+ggpubr::rotate_x_text(), npg+ggpubr::rotate_x_text(), aaas+ggpubr::rotate_x_text(),nejm+ggpubr::rotate_x_text(), nrow = 1, common.legend = TRUE,legend = "none", labels = c("A","B","C","D")) %>% annotate_figure( top = text_grob("Color palette")) ## ----fig.height=6,fig.width=8------------------------------------------------- broadSeq::plot_MDS(se, scaledAssay = "vst", ntop=500, color = "species", shape = "stage", ellipse=TRUE, legend = "bottom") ## ----------------------------------------------------------------------------- head(rowData(se)) ## ----fig.height=6,fig.width=8------------------------------------------------- p_vst <- broadSeq::plotHeatmapCluster( se, scaledAssay = "vst", annotation_col = c("species", "stage"), annotation_row = c("Class","gene_biotype"), ntop = 30, show_geneAs = "symbol", cluster_cols = TRUE, cluster_rows = FALSE, show_rownames = TRUE, show_colnames = FALSE, main = "Top 30 variable gene vst" ) ## ----warning=FALSE------------------------------------------------------------ computedPCA_logCPM <- broadSeq::prcompTidy(se, scaledAssay = "logCPM", ntop = 500) ## PCA based on vst values computedPCA_vst <- broadSeq::prcompTidy(se, scaledAssay = "vst", ntop = 500) ## ----fig.width=8,fig.height=6------------------------------------------------- plotAnyPC(computedPCA = computedPCA_logCPM, x = 1, y = 2, color = "species", shape = "stage", legend = "bottom") ## ----fig.width=8,fig.height=6------------------------------------------------- pca_vst <- plotAnyPC(computedPCA = computedPCA_vst, x = 2, y = 3, color = "species", shape = "stage", legend = "bottom") ## ----fig.width=8,fig.height=6------------------------------------------------- computedPCA_vst$eigen_values %>% dplyr::filter(var_exp >= 2) %>% ggbarplot(x="PC",y="var_exp", label = TRUE, label.pos = "out") ## ----fig.width=8,fig.height=6------------------------------------------------- pca_vst_2_3 <-plotAnyPC(computedPCA = computedPCA_vst, x = 2, y = 3, color = "species", shape = "stage", legend = "bottom") # pca_vst_2_3 ## ----fig.width=7,fig.height=6------------------------------------------------- computedPCA_vst %>% broadSeq::getFeatureLoadRanking(keep = c("symbol","Class")) %>% head() # Top 5 genes of PC2 computedPCA_vst$loadings %>% top_n(5,abs(PC2) ) %>% dplyr::select(gene,PC2) pca_vst_loading <- computedPCA_vst %>% broadSeq::getFeatureLoadRanking(keep = c("symbol","Class"), topN = 50, pcs=1:10) %>% dplyr::count(Class, PC) %>% ggbarplot( x = "PC", y = "n", fill = "Class", legend = "bottom", palette = c("red","blue","orange","purple","white","grey") ) # pca_vst_loading ## ----fig.width=8,fig.height=6------------------------------------------------- # By default it plots top 2 genes from each PC axis pca_vst_bi <- broadSeq::biplotAnyPC(computedPCA = computedPCA_vst, x = 1, y = 2, genesLabel = "symbol", color = "species", shape = "stage", legend = "bottom") # pca_vst_bi ## ----fig.width=8,fig.height=8------------------------------------------------- ggarrange( ggarrange(pca_vst_bi+ggtitle(label = ""), pca_vst_2_3+ggtitle(label = ""), common.legend = TRUE), pca_vst_loading, nrow = 2) ## ----fig.width=8,fig.height=6------------------------------------------------- # Top 5 genes of PC3 biplotAnyPC(computedPCA = computedPCA_vst,x = 2, y = 3, color = "species", shape = "stage", genes= computedPCA_vst$loadings %>% top_n(5,abs(PC3)) %>% pull(gene), genesLabel = "symbol") ## Plot progression gene "Shh" biplotAnyPC(computedPCA = computedPCA_vst,x = 2, y = 3, color = "species", shape = "stage", genes=c("Shh"), genesLabel = "symbol") ## ----warning=FALSE,eval=TRUE-------------------------------------------------- se <- readRDS(system.file("extdata","rat_vole_mouseSE_salmon.rds", package = "broadSeq")) # To reduce the run time, subset of the data used here se <- se[,colData(se)$species == "Mouse"] ## ----warning=FALSE,eval=FALSE,echo=FALSE-------------------------------------- # count_matrix <- as.matrix(read.table(file = system.file("extdata", # "tooth_RNASeq_counts.txt", # package = "DELocal"))) # colData <- data.frame(condition=gsub("\\..*",x=colnames(count_matrix),replacement = "")) # # gene_location <- read.table(file = system.file("extdata", # "gene_location.txt", # package = "DELocal")) # # se <- SummarizedExperiment::SummarizedExperiment(assays=list(counts=count_matrix), # rowData = gene_location, # colData=colData) # # contrast= c("condition","ME13","ME14") ## ----warning=FALSE------------------------------------------------------------ head(rownames(se)) head(rowData(se)) ## ----warning=FALSE,eval=TRUE-------------------------------------------------- head(colData(se)) table(colData(se)$stage) ## ----warning=FALSE,fig.width=6,fig.height=6,message=FALSE--------------------- result_Noiseq <- use_NOIseq(se = se, colData_id = "stage", control = "Bud", treatment = "Cap", rank = TRUE, r = 10) # r is an argument of NOISeq::noiseqbio head(result_Noiseq) ## ----warning=FALSE,fig.width=8,fig.height=6,message=FALSE--------------------- pg <- broadSeq::genes_plot(se, x = "stage", assayName = "counts", features = result_Noiseq %>% dplyr::filter(rank <5) %>% rownames(), fill="stage", facet.by = "symbol", palette="jco", add = "dotplot")+rotate_x_text() pg_sc <- ggscatter(result_Noiseq, x="Bud_mean", y="Cap_mean",color = "prob")+ scale_x_log10()+scale_y_log10() pg+pg_sc ## ----warning=FALSE,results="hide", fig.keep = "none"-------------------------- # limma ?use_limma_trend(se, colData_id, control, treatment, rank = FALSE, ...) ?use_limma_voom(se, colData_id, control, treatment, rank = FALSE, ...) # edgeR ?use_edgeR_exact(se, colData_id, control, treatment, rank = FALSE, ...) ?use_edgeR_GLM(se, colData_id, control, treatment, rank = FALSE, ...) # deseq2 ?use_deseq2(se, colData_id, control, treatment, rank = FALSE, ...) # DELocal ?use_DELocal(se, colData_id, control, treatment, rank = FALSE, ...) # noiseq ?use_NOIseq(se, colData_id, control, treatment, rank = FALSE, ...) # EBSeq ?use_EBSeq(se, colData_id, control, treatment, rank = FALSE, ...) # samseq ?use_SAMseq(se, colData_id, control, treatment, rank = FALSE, ...) ## ----warning=FALSE,results="hide", fig.keep = "none"-------------------------- # First define a named list of functions funs <- list(limma_trend = use_limma_trend, limma_voom = use_limma_voom, edgeR_exact = use_edgeR_exact, edgeR_glm = use_edgeR_GLM, deseq2 = use_deseq2, DELocal = use_DELocal, noiseq = use_NOIseq, EBSeq = use_EBSeq) multi_result <- broadSeq::use_multDE( se = se, deFun_list = funs, return.df = TRUE, colData_id = "stage", control = "Bud", treatment = "Cap", rank = TRUE) ## ----warning=FALSE------------------------------------------------------------ head(multi_result) # nrow(multi_result) == nrow(se) colnames(multi_result) ## ----warning=FALSE,fig.width=6,fig.height=6,eval=TRUE------------------------- clusters <- multi_result %>% dplyr::select(ends_with("rank")) %>% t() %>% dist() %>% hclust() plot(clusters,main = "distance: Euclidean") ## ----warning=FALSE,fig.width=6,fig.height=6,eval=TRUE------------------------- multi_result %>% broadSeq::volcanoPlot( pValName = "deseq2_padj", lFCName = "deseq2_log2FoldChange", labelName = "symbol", palette = "lancet" , selectedLabel = multi_result %>% dplyr::arrange(deseq2_padj) %>% pull(symbol) %>% head() ) multi_result %>% broadSeq::volcanoPlot( pValName = "deseq2_padj", lFCName = "deseq2_log2FoldChange", labelName = "symbol", palette = c("purple","orange","grey"), selectedLabel = list(criteria = "(`x` > 5 | `x` < -2) & (`y` > 10)") ) +xlim(-7.5,7.5) ## ----warning=FALSE,error=FALSE------------------------------------------------ sessionInfo()