## ----style, eval=TRUE, echo=FALSE, results='asis'-------------------------- BiocStyle::latex() ## ----knitr, echo=FALSE, results='hide'------------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,dev="pdf",fig.show="hide", fig.width=4,fig.height=4.5, dpi=300,# increase dpi to avoid pixelised pngs message=FALSE) ## ----options,results='hide',echo=FALSE------------------------------------- options(digits=2, prompt=" ", continue=" ") ## ----systemFile------------------------------------------------------------ pythonScriptsDir = system.file( "python_scripts", package="DEXSeq" ) list.files(pythonScriptsDir) ## ----systemFileCheck,echo=FALSE,results='hide'----------------------------- system.file( "python_scripts", package="DEXSeq", mustWork=TRUE ) ## ----loadDEXSeq------------------------------------------------------------ inDir = system.file("extdata", package="pasilla") countFiles = list.files(inDir, pattern="fb.txt$", full.names=TRUE) basename(countFiles) flattenedFile = list.files(inDir, pattern="gff$", full.names=TRUE) basename(flattenedFile) ## ----sampleTable----------------------------------------------------------- sampleTable = data.frame( row.names = c( "treated1", "treated2", "treated3", "untreated1", "untreated2", "untreated3", "untreated4" ), condition = c("knockdown", "knockdown", "knockdown", "control", "control", "control", "control" ), libType = c( "single-end", "paired-end", "paired-end", "single-end", "single-end", "paired-end", "paired-end" ) ) ## ----displaySampleTable---------------------------------------------------- sampleTable ## ----makeecs, eval=TRUE---------------------------------------------------- suppressPackageStartupMessages( library( "DEXSeq" ) ) dxd = DEXSeqDataSetFromHTSeq( countFiles, sampleData=sampleTable, design= ~ sample + exon + condition:exon, flattenedfile=flattenedFile ) ## ----start----------------------------------------------------------------- genesForSubset = read.table( file.path(inDir, "geneIDsinsubset.txt"), stringsAsFactors=FALSE)[[1]] dxd = dxd[geneIDs( dxd ) %in% genesForSubset,] ## ----seeColData------------------------------------------------------------ colData(dxd) ## ----seeCounts------------------------------------------------------------- head( counts(dxd), 5 ) ## ----seeSplitted----------------------------------------------------------- split( seq_len(ncol(dxd)), colData(dxd)$exon ) ## ----seeCounts2------------------------------------------------------------ head( featureCounts(dxd), 5 ) ## ----fData----------------------------------------------------------------- head( rowRanges(dxd), 3 ) ## ----pData----------------------------------------------------------------- sampleAnnotation( dxd ) ## ----sizeFactors1---------------------------------------------------------- dxd = estimateSizeFactors( dxd ) ## ----estDisp1-------------------------------------------------------------- dxd = estimateDispersions( dxd ) ## ----fitdiagnostics, dev='png', fig.show='asis', fig.small=TRUE, fig.cap='Fit Diagnostics. The initial per-exon dispersion estimates (shown by black points), the fitted mean-dispersion values function (red line), and the shrinked values in blue.\\label{figure/fitdiagnostics-1}'---- plotDispEsts( dxd ) ## ----testForDEU1,cache=TRUE------------------------------------------------ dxd = testForDEU( dxd ) ## ----estFC,cache=TRUE------------------------------------------------------ dxd = estimateExonFoldChanges( dxd, fitExpToVar="condition") ## ----results1,cache=TRUE--------------------------------------------------- dxr1 = DEXSeqResults( dxd ) dxr1 ## ----results2,cache=TRUE--------------------------------------------------- mcols(dxr1)$description ## ----tallyExons------------------------------------------------------------ table ( dxr1$padj < 0.1 ) ## ----tallyGenes------------------------------------------------------------ table ( tapply( dxr1$padj < 0.1, dxr1$groupID, any ) ) ## ----MvsA, dev='png', fig.show='asis', fig.small=TRUE, fig.cap='MA plot. Mean expression versus $\\log_2$ fold change plot. Significant hits (at \\Robject{padj}<0.1) are coloured in red.\\label{figure/MvsA-1}'---- plotMA( dxr1, cex=0.8 ) ## ----design---------------------------------------------------------------- sampleAnnotation(dxd) ## ----formulas2------------------------------------------------------------- formulaFullModel = ~ sample + exon + libType:exon + condition:exon formulaReducedModel = ~ sample + exon + libType:exon ## ----estDisps_again, cache=TRUE, results='hide'---------------------------- dxd = estimateDispersions( dxd, formula = formulaFullModel ) ## ----test_again, cache=TRUE------------------------------------------------ dxd = testForDEU( dxd, reducedModel = formulaReducedModel, fullModel = formulaFullModel ) ## ----res_again------------------------------------------------------------- dxr2 = DEXSeqResults( dxd ) ## ----table2---------------------------------------------------------------- table( dxr2$padj < 0.1 ) ## ----table3---------------------------------------------------------------- table( before = dxr1$padj < 0.1, now = dxr2$padj < 0.1 ) ## ----plot1, fig.height=8, fig.width=12------------------------------------- plotDEXSeq( dxr2, "FBgn0010909", legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----checkClaim,echo=FALSE------------------------------------------------- wh = (dxr2$groupID=="FBgn0010909") stopifnot(sum(dxr2$padj[wh] < formals(plotDEXSeq)$FDR)==1) ## ----plot2, fig.height=8, fig.width=12------------------------------------- plotDEXSeq( dxr2, "FBgn0010909", displayTranscripts=TRUE, legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----plot3, fig.height=8, fig.width=12------------------------------------- plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, norCounts=TRUE, legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----plot4, fig.height=8, fig.width=12------------------------------------- plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, splicing=TRUE, legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 ) ## ----DEXSeqHTML,cache=TRUE, eval=FALSE------------------------------------- # DEXSeqHTML( dxr2, FDR=0.1, color=c("#FF000080", "#0000FF80") ) ## ----para1,cache=TRUE,results='hide', eval=FALSE--------------------------- # BPPARAM = MultiCoreParam(workers=4) # dxd = estimateSizeFactors( dxd ) # dxd = estimateDispersions( dxd, BPPARAM=BPPARAM) # dxd = testForDEU( dxd, BPPARAM=BPPARAM) # dxd = estimateExonFoldChanges(dxd, BPPARAM=BPPARAM) ## ----alldeu, cache=TRUE---------------------------------------------------- dxr = DEXSeq(dxd) class(dxr) ## ----pergeneqval----------------------------------------------------------- numbOfGenes <- sum( perGeneQValue(dxr) < 0.1) numbOfGenes ## ----buildExonCountSetLoadPacks,cache=TRUE, eval=FALSE--------------------- # library(GenomicRanges) # library(GenomicFeatures) # library(GenomicAlignments) ## ----buildExonCountSetDownloadAnno,cache=TRUE, eval=FALSE------------------ # hse = makeTxDbFromBiomart( biomart="ensembl", # dataset="hsapiens_gene_ensembl" ) ## ----buildExonCountSetDisjoin,cache=TRUE, eval=FALSE----------------------- # exonicParts = disjointExons( hse, aggregateGenes=FALSE ) ## ----buildExonCountSet2FindBAMs,cache=TRUE, eval=FALSE--------------------- # bamDir = system.file( "extdata", package="parathyroidSE", # mustWork=TRUE ) # fls = list.files( bamDir, pattern="bam$", full=TRUE ) ## ----buildExonCountSet2FindBAMs2,cache=TRUE, eval=FALSE-------------------- # bamlst = BamFileList( fls, index=character(), yieldSize=100000, # obeyQname=TRUE ) # SE = summarizeOverlaps( exonicParts, bamlst, mode="Union", # singleEnd=FALSE, ignore.strand=TRUE, inter.feature=FALSE, # fragments=TRUE ) ## ----buildExonCountSet3,cache=TRUE, eval=FALSE----------------------------- # colData(SE)$condition = c("A", "A", "B") # DEXSeqDataSetFromSE( SE, design= ~ sample + exon + condition:exon ) ## ----acc------------------------------------------------------------------- head( geneIDs(dxd) ) head( exonIDs(dxd) ) ## ----grmethods------------------------------------------------------------- interestingRegion = GRanges( "chr2L", IRanges(start=3872658, end=3875302) ) subsetByOverlaps( x=dxr, ranges=interestingRegion ) findOverlaps( query=dxr, subject=interestingRegion ) ## ----sessionInfo----------------------------------------------------------- sessionInfo()