## ----style, eval=TRUE, echo=FALSE, results='asis'--------------------------
BiocStyle::latex()

## ----knitr, echo=FALSE, results='hide'-------------------------------------
library("knitr")
opts_chunk$set(tidy=FALSE,dev="pdf",fig.show="hide",
               fig.width=4,fig.height=4.5,
               dpi=300,# increase dpi to avoid pixelised pngs
               message=FALSE)

## ----options,results='hide',echo=FALSE-------------------------------------
options(digits=2, prompt=" ", continue=" ")

## ----systemFile------------------------------------------------------------
pythonScriptsDir = system.file( "python_scripts", package="DEXSeq" )
list.files(pythonScriptsDir)

## ----systemFileCheck,echo=FALSE,results='hide'-----------------------------
system.file( "python_scripts", package="DEXSeq", mustWork=TRUE )

## ----loadDEXSeq------------------------------------------------------------
inDir = system.file("extdata", package="pasilla")
countFiles = list.files(inDir, pattern="fb.txt$", full.names=TRUE)
basename(countFiles)
flattenedFile = list.files(inDir, pattern="gff$", full.names=TRUE)
basename(flattenedFile)

## ----sampleTable-----------------------------------------------------------
sampleTable = data.frame(
   row.names = c( "treated1", "treated2", "treated3", 
      "untreated1", "untreated2", "untreated3", "untreated4" ),
   condition = c("knockdown", "knockdown", "knockdown",  
      "control", "control", "control", "control" ),
   libType = c( "single-end", "paired-end", "paired-end", 
      "single-end", "single-end", "paired-end", "paired-end" ) )

## ----displaySampleTable----------------------------------------------------
sampleTable

## ----makeecs, eval=TRUE----------------------------------------------------
suppressPackageStartupMessages( library( "DEXSeq" ) )

dxd = DEXSeqDataSetFromHTSeq(
   countFiles,
   sampleData=sampleTable,
   design= ~ sample + exon + condition:exon,
   flattenedfile=flattenedFile )

## ----start-----------------------------------------------------------------
genesForSubset = read.table( 
  file.path(inDir, "geneIDsinsubset.txt"), 
  stringsAsFactors=FALSE)[[1]]

dxd = dxd[geneIDs( dxd ) %in% genesForSubset,]

## ----seeColData------------------------------------------------------------
colData(dxd)

## ----seeCounts-------------------------------------------------------------
head( counts(dxd), 5 )

## ----seeSplitted-----------------------------------------------------------
split( seq_len(ncol(dxd)), colData(dxd)$exon )

## ----seeCounts2------------------------------------------------------------
head( featureCounts(dxd), 5 )

## ----fData-----------------------------------------------------------------
head( rowRanges(dxd), 3 )

## ----pData-----------------------------------------------------------------
sampleAnnotation( dxd )

## ----sizeFactors1----------------------------------------------------------
dxd = estimateSizeFactors( dxd )

## ----estDisp1--------------------------------------------------------------
dxd = estimateDispersions( dxd )

## ----fitdiagnostics, dev='png', fig.show='asis', fig.small=TRUE, fig.cap='Fit Diagnostics. The initial per-exon dispersion estimates (shown by black points), the fitted mean-dispersion values function (red line), and the shrinked values in blue.\\label{figure/fitdiagnostics-1}'----
plotDispEsts( dxd )

## ----testForDEU1,cache=TRUE------------------------------------------------
dxd = testForDEU( dxd )

## ----estFC,cache=TRUE------------------------------------------------------
dxd = estimateExonFoldChanges( dxd, fitExpToVar="condition")

## ----results1,cache=TRUE---------------------------------------------------
dxr1 = DEXSeqResults( dxd )
dxr1

## ----results2,cache=TRUE---------------------------------------------------
mcols(dxr1)$description

## ----tallyExons------------------------------------------------------------
table ( dxr1$padj < 0.1 )

## ----tallyGenes------------------------------------------------------------
table ( tapply( dxr1$padj < 0.1, dxr1$groupID, any ) )

## ----MvsA, dev='png', fig.show='asis', fig.small=TRUE, fig.cap='MA plot. Mean expression versus $\\log_2$ fold change plot. Significant hits (at \\Robject{padj}<0.1) are coloured in red.\\label{figure/MvsA-1}'----
plotMA( dxr1, cex=0.8 )

## ----design----------------------------------------------------------------
sampleAnnotation(dxd)

## ----formulas2-------------------------------------------------------------
formulaFullModel    =  ~ sample + exon + libType:exon + condition:exon
formulaReducedModel =  ~ sample + exon + libType:exon 

## ----estDisps_again, cache=TRUE, results='hide'----------------------------
dxd = estimateDispersions( dxd, formula = formulaFullModel )

## ----test_again, cache=TRUE------------------------------------------------
dxd = testForDEU( dxd, 
	reducedModel = formulaReducedModel, 
        fullModel = formulaFullModel )

## ----res_again-------------------------------------------------------------
dxr2 = DEXSeqResults( dxd )

## ----table2----------------------------------------------------------------
table( dxr2$padj < 0.1 )

## ----table3----------------------------------------------------------------
table( before = dxr1$padj < 0.1, now = dxr2$padj < 0.1 )

## ----plot1, fig.height=8, fig.width=12-------------------------------------
plotDEXSeq( dxr2, "FBgn0010909", legend=TRUE, cex.axis=1.2, cex=1.3,
   lwd=2 )

## ----checkClaim,echo=FALSE-------------------------------------------------
wh = (dxr2$groupID=="FBgn0010909")
stopifnot(sum(dxr2$padj[wh] < formals(plotDEXSeq)$FDR)==1)

## ----plot2, fig.height=8, fig.width=12-------------------------------------
plotDEXSeq( dxr2, "FBgn0010909", displayTranscripts=TRUE, legend=TRUE,
   cex.axis=1.2, cex=1.3, lwd=2 )

## ----plot3, fig.height=8, fig.width=12-------------------------------------
plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, norCounts=TRUE,
   legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----plot4, fig.height=8, fig.width=12-------------------------------------
plotDEXSeq( dxr2, "FBgn0010909", expression=FALSE, splicing=TRUE,
   legend=TRUE, cex.axis=1.2, cex=1.3, lwd=2 )

## ----DEXSeqHTML,cache=TRUE, eval=FALSE-------------------------------------
#  DEXSeqHTML( dxr2, FDR=0.1, color=c("#FF000080", "#0000FF80") )

## ----para1,cache=TRUE,results='hide', eval=FALSE---------------------------
#  BPPARAM = MultiCoreParam(workers=4)
#  dxd = estimateSizeFactors( dxd )
#  dxd = estimateDispersions( dxd, BPPARAM=BPPARAM)
#  dxd = testForDEU( dxd, BPPARAM=BPPARAM)
#  dxd = estimateExonFoldChanges(dxd, BPPARAM=BPPARAM)

## ----alldeu, cache=TRUE----------------------------------------------------
dxr = DEXSeq(dxd)
class(dxr)

## ----pergeneqval-----------------------------------------------------------
numbOfGenes <- sum( perGeneQValue(dxr) < 0.1)
numbOfGenes

## ----buildExonCountSetLoadPacks,cache=TRUE, eval=FALSE---------------------
#  library(GenomicRanges)
#  library(GenomicFeatures)
#  library(GenomicAlignments)

## ----buildExonCountSetDownloadAnno,cache=TRUE, eval=FALSE------------------
#  hse = makeTxDbFromBiomart( biomart="ensembl",
#     dataset="hsapiens_gene_ensembl" )

## ----buildExonCountSetDisjoin,cache=TRUE, eval=FALSE-----------------------
#  exonicParts = disjointExons( hse, aggregateGenes=FALSE )

## ----buildExonCountSet2FindBAMs,cache=TRUE, eval=FALSE---------------------
#  bamDir = system.file( "extdata", package="parathyroidSE",
#     mustWork=TRUE )
#  fls = list.files( bamDir, pattern="bam$", full=TRUE )

## ----buildExonCountSet2FindBAMs2,cache=TRUE, eval=FALSE--------------------
#  bamlst = BamFileList( fls, index=character(), yieldSize=100000,
#     obeyQname=TRUE )
#  SE = summarizeOverlaps( exonicParts, bamlst, mode="Union",
#     singleEnd=FALSE, ignore.strand=TRUE, inter.feature=FALSE,
#     fragments=TRUE )

## ----buildExonCountSet3,cache=TRUE, eval=FALSE-----------------------------
#  colData(SE)$condition = c("A", "A", "B")
#  DEXSeqDataSetFromSE( SE, design= ~ sample + exon + condition:exon )

## ----acc-------------------------------------------------------------------
head( geneIDs(dxd) )
head( exonIDs(dxd) )

## ----grmethods-------------------------------------------------------------
interestingRegion = GRanges( "chr2L", 
   IRanges(start=3872658, end=3875302) )
subsetByOverlaps( x=dxr, ranges=interestingRegion )
findOverlaps( query=dxr, subject=interestingRegion )

## ----sessionInfo-----------------------------------------------------------
sessionInfo()