### R code from vignette source 'DESeq2_parathyroid.Rnw'

###################################################
### code chunk number 1: options
###################################################
options(digits=3, width=80)
library("cacheSweave")
setCacheDir("cachedir")


###################################################
### code chunk number 2: libraries
###################################################
library( "DESeq2" )
library( "parathyroidSE" )


###################################################
### code chunk number 3: loadEcs
###################################################
data("parathyroidGenesSE")


###################################################
### code chunk number 4: countTable
###################################################
head( assay( parathyroidGenesSE ) )


###################################################
### code chunk number 5: nrowSE
###################################################
nrow(parathyroidGenesSE)


###################################################
### code chunk number 6: colData
###################################################
colData( parathyroidGenesSE )


###################################################
### code chunk number 7: rowData
###################################################
rowData( parathyroidGenesSE )


###################################################
### code chunk number 8: columnData
###################################################
as.data.frame( colData(parathyroidGenesSE)[,c("sample","patient","treatment","time")] )


###################################################
### code chunk number 9: split
###################################################
allColSamples <- colData(parathyroidGenesSE)$sample
sp <- split( seq(along=allColSamples), colData(parathyroidGenesSE)$sample )


###################################################
### code chunk number 10: addSamples
###################################################
countdata <- sapply(sp, function(columns) 
   rowSums( assay(parathyroidGenesSE)[,columns,drop=FALSE] ) )
head(countdata)


###################################################
### code chunk number 11: poormanssum
###################################################
a <- assay(parathyroidGenesSE)
countdata2 <- cbind( a[,1:8], a[,9]+a[,10], a[,11], 
   a[,12]+a[,13], a[,14:22], a[,23]+a[,24], a[,25], a[,26]+a[,27] )
all( countdata == countdata2 )


###################################################
### code chunk number 12: subsetMetaData
###################################################
coldata <- colData(parathyroidGenesSE)[sapply(sp, `[`, 1),]
rownames(coldata) <- coldata$sample
coldata


###################################################
### code chunk number 13: colDataSubsetCols
###################################################
coldata <- coldata[ , c( "patient", "treatment", "time" ) ]
head( coldata )


###################################################
### code chunk number 14: rowdata
###################################################
rowdata <- rowData(parathyroidGenesSE)
rowdata


###################################################
### code chunk number 15: makeddsfull
###################################################
ddsFull <- DESeqDataSetFromMatrix(
  countData = countdata,
  colData = coldata,
  design = ~ patient + treatment,
  rowData = rowdata)
ddsFull  


###################################################
### code chunk number 16: testCountSum
###################################################
stopifnot(sum(assay(parathyroidGenesSE)) == sum(counts(ddsFull)))


###################################################
### code chunk number 17: subset
###################################################
dds <- ddsFull[ , colData(ddsFull)$treatment %in% c("Control","DPN") & 
                  colData(ddsFull)$time == "48h" ]


###################################################
### code chunk number 18: refactor
###################################################
dds$patient <- factor(dds$patient)
dds$treatment <- factor(dds$treatment)


###################################################
### code chunk number 19: relevel
###################################################
dds$treatment <- relevel( dds$treatment, "Control" )


###################################################
### code chunk number 20: multifactorColData
###################################################
colData(dds)


###################################################
### code chunk number 21: runDESeq
###################################################
dds <- DESeq(dds)


###################################################
### code chunk number 22: extractResults
###################################################
res <- results(dds)
res


###################################################
### code chunk number 23: resCols
###################################################
mcols(res)


###################################################
### code chunk number 24: checkBaseMean
###################################################
all.equal(res$baseMean, rowMeans(counts(dds)))
all.equal(res$baseMean, rowMeans(counts(dds,normalized=TRUE)))


###################################################
### code chunk number 25: rawpvalue
###################################################
sum( res$pvalue < 0.05, na.rm=TRUE )
sum( is.na(res$pvalue) )


###################################################
### code chunk number 26: adjpvalue
###################################################
sum( res$padj < 0.1, na.rm=TRUE )


###################################################
### code chunk number 27: strongGenes
###################################################
resSig <- res[ which(res$padj < 0.1 ), ]
head( resSig[ order( resSig$log2FoldChange ), ] )


###################################################
### code chunk number 28: strongGenesUp
###################################################
tail( resSig[ order( resSig$log2FoldChange ), ] )


###################################################
### code chunk number 29: proportionSigGenes
###################################################
table(sign(resSig$log2FoldChange))


###################################################
### code chunk number 30: plotMA1
###################################################
plotMA(dds, ylim = c( -1.5, 1.5 ) )


###################################################
### code chunk number 31: dispPlot
###################################################
plotDispEsts( dds )


###################################################
### code chunk number 32: plotMApadjchange
###################################################
plotMA(dds, pvalCutoff = 0.5, ylim = c( -1.5, 1.5) )


###################################################
### code chunk number 33: pvalHist
###################################################
hist( res$pvalue, breaks=100     )


###################################################
### code chunk number 34: filter
###################################################
filterThreshold <- 2.0
keep <- rowMeans( counts( dds, normalized=TRUE ) ) > filterThreshold
table( keep )


###################################################
### code chunk number 35: adjFilter
###################################################
min( res$padj[!keep], na.rm=TRUE )


###################################################
### code chunk number 36: padjFilterCmp
###################################################
table( p.adjust( res$pvalue, method="BH" ) < .1 )
table( p.adjust( res$pvalue[keep], method="BH" ) < .1 )


###################################################
### code chunk number 37: pvalHistFilt
###################################################
hist( res$pvalue[keep], breaks=100 )


###################################################
### code chunk number 38: loadOrg
###################################################
library( "org.Hs.eg.db" )


###################################################
### code chunk number 39: keyType
###################################################
cols(org.Hs.eg.db)


###################################################
### code chunk number 40: convertIDs
###################################################
convertIDs <- function( ids, fromKey, toKey, db, ifMultiple=c( "putNA", "useFirst" ) ) {
   stopifnot( inherits( db, "AnnotationDb" ) )
   ifMultiple <- match.arg( ifMultiple )
   suppressWarnings( selRes <- AnnotationDbi::select( 
      db, keys=ids, keytype=fromKey, cols=c(fromKey,toKey) ) )
   if( ifMultiple == "putNA" ) {
      duplicatedIds <- selRes[ duplicated( selRes[,1] ), 1 ]   
      selRes <- selRes[ ! selRes[,1] %in% duplicatedIds, ] }
   return( selRes[ match( ids, selRes[,1] ), 2 ] )
}


###################################################
### code chunk number 41: addSymbols
###################################################
res$symbol <- convertIDs( row.names(res), "ENSEMBL", "SYMBOL", org.Hs.eg.db )
res


###################################################
### code chunk number 42: writeCSV
###################################################
write.csv( as.data.frame(res), file="results.csv" )


###################################################
### code chunk number 43: loadReactome
###################################################
library( "reactome.db" )


###################################################
### code chunk number 44: addEntrez
###################################################
res$entrez <- convertIDs( row.names(res), "ENSEMBL", "ENTREZID", org.Hs.eg.db )


###################################################
### code chunk number 45: subsetTores2
###################################################
res2 <- res[ res$entrez %in% keys( reactome.db, "ENTREZID" ) & 
   !is.na( res$pvalue) , ]
head(res2)


###################################################
### code chunk number 46: queryReactome
###################################################
reactomeTable <- AnnotationDbi::select( reactome.db, keys=res2$entrez, 
   keytype="ENTREZID", cols=c("ENTREZID","REACTOMEID") )
head(reactomeTable)


###################################################
### code chunk number 47: CreateIncidenceMatrix
###################################################
incm <- do.call( rbind, with(reactomeTable, tapply( 
   ENTREZID, factor(REACTOMEID), function(x) res2$entrez %in% x ) ))
colnames(incm) <- res2$entrez

str(incm)


###################################################
### code chunk number 48: PruneIncm
###################################################
incm <- incm[ rowSums(incm) >= 5, ]


###################################################
### code chunk number 49: testFun
###################################################
testCategory <- function( reactomeID ) {
  isMember <- incm[ reactomeID, ]
  data.frame( 
     reactomeID = reactomeID,
     numGenes = sum( isMember ),
     avgLFC = mean( res2$log2FoldChange[isMember] ),
     strength = sum( res2$log2FoldChange[isMember] ) / sqrt(sum(isMember)),
     pvalue = t.test( res2$log2FoldChange[ isMember ] )$p.value,
     reactomeName = reactomePATHID2NAME[[reactomeID]] ) }


###################################################
### code chunk number 50: testTestFun
###################################################
testCategory("109581")


###################################################
### code chunk number 51: runGSEA
###################################################
reactomeResult <- do.call( rbind, lapply( rownames(incm), testCategory ) )


###################################################
### code chunk number 52: padjGSEA
###################################################
reactomeResult$padjust <- p.adjust( reactomeResult$pvalue, "BH" )


###################################################
### code chunk number 53: GSEAresult
###################################################
reactomeResultSignif <- reactomeResult[ reactomeResult$padjust < 0.05, ]
reactomeResultSignif[ order(reactomeResultSignif$strength), ]


###################################################
### code chunk number 54: topDEGene
###################################################
deGeneID <- "ENSG00000099194"
res[deGeneID,]
deGene <- range(rowData(dds[deGeneID,])[[1]])
names(deGene) <- deGeneID
deGene


###################################################
### code chunk number 55: defineRange
###################################################
as.character(seqnames(deGene))
ucscChrom <- paste0("chr",as.character(seqnames(deGene)))
ucscRanges <- ranges(flank(deGene,width=10e6,both=TRUE))
subsetRange <- GRanges(ucscChrom, ucscRanges)
subsetRange


###################################################
### code chunk number 56: downloadERBS (eval = FALSE)
###################################################
## ##
## ## Please do not run this code if you do not have an internet connection, 
## ## alternatively use the local file import in the next code chunk.
## ##
## library( "rtracklayer" )
## trackName <- "wgEncodeRegTfbsClusteredV2"
## tableName <- "wgEncodeRegTfbsClusteredV2"
## trFactor <- "ERalpha_a"
## mySession <- browserSession()
## ucscTable <- getTable(ucscTableQuery(mySession, track=trackName,
##                                      range=subsetRange, table=tableName,
##                                      name=trFactor))


###################################################
### code chunk number 57: localERBS
###################################################
ucscTableFile <- "localUcscTable.csv"
ucscTable <- read.csv(ucscTableFile, stringsAsFactors=FALSE)


###################################################
### code chunk number 58: ERBS2Peaks
###################################################
peaks <- with(ucscTable, GRanges(chrom, IRanges(chromStart, chromEnd), 
                                 score=score))
seqlevels(peaks) <- gsub("chr(.+)","\\1",seqlevels(peaks))
seqlevels(peaks) <- seqlevels(deGene)


###################################################
### code chunk number 59: nearestPeakDo
###################################################
# suppress warning which exists for BioC 2.12 of changes to distance()
suppressWarnings({d2nearest <- distanceToNearest(deGene, peaks)})


###################################################
### code chunk number 60: nearestPeakShow (eval = FALSE)
###################################################
## d2nearest <- distanceToNearest(deGene, peaks)


###################################################
### code chunk number 61: distanceShow (eval = FALSE)
###################################################
## distance(deGene, peaks)


###################################################
### code chunk number 62: distanceDo
###################################################
suppressWarnings({distance(deGene, peaks)})


###################################################
### code chunk number 63: distanceToNearest
###################################################
d2nearest


###################################################
### code chunk number 64: nearestPeakOut
###################################################
deGene
peaks[subjectHits(d2nearest)]


###################################################
### code chunk number 65: plotPeaksAndGene
###################################################
plotRange <- start(deGene) + 1e6 * c(-1,1)
peakNearest <- ( seq_along(peaks) == subjectHits(d2nearest) )
plot(x=start(peaks), y=ifelse(peakNearest,.3,.2),
     ylim=c(0,1), xlim=plotRange, pch='p',
     col=ifelse(peakNearest,"red","grey60"),
     yaxt="n", ylab="",
     xlab=paste("2 Mb on chromosome",as.character(seqnames(deGene))))
points(x=start(deGene),y=.8,pch='g')


###################################################
### code chunk number 66: calculateSumForCaption
###################################################
plotGRange <- GRanges(seqnames(peaks[1]),IRanges(plotRange[1],plotRange[2]))
numPeaksInPlotRange <- sum(peaks %over% plotGRange)


###################################################
### code chunk number 67: peakDists
###################################################
  peakDists <- diff(sort(start(peaks)))
  summary(peakDists)
  mean(peakDists)


###################################################
### code chunk number 68: rld
###################################################
rld <- rlogTransformation(dds)
head( assay(rld) )


###################################################
### code chunk number 69: rldPlot
###################################################
par( mfrow = c( 1, 2 ) )
plot( log2( 1+counts(dds, normalized=TRUE)[, 1:2] ), col="#00000020", pch=20, cex=0.3 )
plot( assay(rld)[, 1:2], col="#00000020", pch=20, cex=0.3 )


###################################################
### code chunk number 70: euclDist
###################################################
sampleDists <- dist( t( assay(rld) ) )
sampleDists


###################################################
### code chunk number 71: sampleDistHeatmap
###################################################
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- paste( colData(rld)$treatment, 
   colData(rld)$patient, sep="-" )
   
library( "gplots" )   
heatmap.2( sampleDistMatrix, trace="none" )


###################################################
### code chunk number 72: betterheatmap
###################################################
library("RColorBrewer")  
colours = colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
heatmap.2( sampleDistMatrix, trace="none", col=colours)


###################################################
### code chunk number 73: samplePCA
###################################################
print( plotPCA( rld, intgroup = c( "patient", "treatment") ) )


###################################################
### code chunk number 74: topVarGenes
###################################################
library( "genefilter" )
topVarGenes <- head( order( rowVars( assay(rld) ), decreasing=TRUE ), 35 )


###################################################
### code chunk number 75: geneHeatmap
###################################################
heatmap.2( assay(rld)[ topVarGenes, ], scale="row", 
     trace="none", dendrogram="column", 
     col = colorRampPalette( rev(brewer.pal(9, "RdBu")) )(255))


###################################################
### code chunk number 76: sessInfo
###################################################
sessionInfo()


###################################################
### code chunk number 77: relocateAnswers
###################################################
relocateAnswers = function(con="DESeq2_parathyroid.tex"){
  txt = readLines(con=con)
  i1 = grep("\\begin{answer}", txt, fixed=TRUE)
  i2 = grep("\\end{answer}",  txt, fixed=TRUE)
  if(!((length(i1)==length(i2))&&all(i2>i1))) stop("\\begin{answer}/\\end{answer} macros are unbalanced.")
  i = unlist(mapply(`:`, i1, i2))
  res = txt[-i]
  sol = txt[i]
  dest = grep("\\section{Solutions}", res, fixed=TRUE)
  if(length(dest)!=1) stop("\\section{Solutions} not found exactly once.")
  res = c(res[1:dest], sol, res[(dest+1):length(res)])
  writeLines(res, con=con)
}