### R code from vignette source 'ChIPpeakAnno.Rnw' ################################################### ### code chunk number 1: style ################################################### BiocStyle::latex() ################################################### ### code chunk number 2: quickStart ################################################### library(ChIPpeakAnno) ## import the MACS output macs <- system.file("extdata", "MACS_peaks.xls", package="ChIPpeakAnno") macsOutput <- toGRanges(macs, format="MACS") ## annotate the peaks with ensembl annotation data(TSS.human.GRCh38) macs.anno <- annotatePeakInBatch(macsOutput, AnnotationData=TSS.human.GRCh38, output="overlapping", maxgap=5000L) ## add gene symbols library(org.Hs.eg.db) macs.anno <- addGeneIDs(annotatedPeak=macs.anno, orgAnn="org.Hs.eg.db", IDs2Add="symbol") head(macs.anno) if(interactive()){## annotate the peaks with UCSC annotation library(GenomicFeatures) library(TxDb.Hsapiens.UCSC.hg38.knownGene) ucsc.hg38.knownGene <- genes(TxDb.Hsapiens.UCSC.hg38.knownGene) macs.anno <- annotatePeakInBatch(macsOutput, AnnotationData=ucsc.hg38.knownGene, output="overlapping", maxgap=5000L) macs.anno <- addGeneIDs(annotatedPeak=macs.anno, orgAnn="org.Hs.eg.db", feature_id_type="entrez_id", IDs2Add="symbol") head(macs.anno) } ################################################### ### code chunk number 3: annotatePeakInBatch1 ################################################### library(ChIPpeakAnno) data(myPeakList) data(TSS.human.NCBI36) annotatedPeak <- annotatePeakInBatch(myPeakList[1:6,], AnnotationData=TSS.human.NCBI36) annotatedPeak ################################################### ### code chunk number 4: annotatePeakInBatch2 ################################################### myPeak1 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "2", "6", "6", "6", "6", "5"), ranges=IRanges(start=c(967654, 2010897, 2496704, 3075869, 3123260, 3857501, 201089, 1543200, 1557200, 1563000, 1569800, 167889600), end= c(967754, 2010997, 2496804, 3075969, 3123360, 3857601, 201089, 1555199, 1560599, 1565199, 1573799, 167893599), names=paste("Site", 1:12, sep=""))) TFbindingSites <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "1", "2", "3", "4", "5", "6", "6", "6", "6", "6", "5"), ranges=IRanges(start=c(967659, 2010898, 2496700, 3075866, 3123260, 3857500, 96765, 201089, 249670, 307586, 312326, 385750, 1549800, 1554400, 1565000, 1569400, 167888600), end=c(967869, 2011108, 2496920, 3076166,3123470, 3857780, 96985, 201299, 249890, 307796, 312586, 385960, 1550599, 1560799, 1565399, 1571199, 167888999), names=paste("t", 1:17, sep="")), strand=c("+", "+", "+", "+", "+", "+", "-", "-", "-", "-", "-", "-", "+", "+", "+", "+", "+")) annotatedPeak2 <- annotatePeakInBatch(myPeak1, AnnotationData=TFbindingSites) annotatedPeak2 ################################################### ### code chunk number 5: pie1 ################################################### pie(table(as.data.frame(annotatedPeak2)$insideFeature)) ################################################### ### code chunk number 6: findOverlapsOfPeaks3 ################################################### peaks1 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "2", "6", "6", "6", "6", "5"), ranges=IRanges(start=c(967654, 2010897, 2496704, 3075869, 3123260, 3857501, 201089, 1543200, 1557200, 1563000, 1569800, 167889600), end= c(967754, 2010997, 2496804, 3075969, 3123360, 3857601, 201089, 1555199, 1560599, 1565199, 1573799, 167893599), names=paste("Site", 1:12, sep="")), strand="+") peaks2 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "1", "2", "3", "4", "5", "6", "6", "6", "6", "6", "5"), ranges=IRanges(start=c(967659, 2010898, 2496700, 3075866, 3123260, 3857500, 96765, 201089, 249670, 307586, 312326, 385750, 1549800, 1554400, 1565000, 1569400, 167888600), end=c(967869, 2011108, 2496920, 3076166,3123470, 3857780, 96985, 201299, 249890, 307796, 312586, 385960, 1550599, 1560799, 1565399, 1571199, 167888999), names=paste("t", 1:17, sep="")), strand=c("+", "+", "+", "+", "+", "+", "-", "-", "-", "-", "-", "-", "+", "+", "+", "+", "+")) ol <- findOverlapsOfPeaks(peaks1, peaks2, maxgap=1000) peaklist <- ol$peaklist ################################################### ### code chunk number 7: overlappingPeaks4 ################################################### overlappingPeaks <- ol$overlappingPeaks overlappingPeaks pie(table(overlappingPeaks[["peaks1///peaks2"]]$overlapFeature)) ################################################### ### code chunk number 8: overlappingPeaks5 ################################################### peaklist[["peaks1///peaks2"]] ################################################### ### code chunk number 9: 6 ################################################### peaklist[["peaks1"]] ################################################### ### code chunk number 10: 7 ################################################### peaklist[["peaks2"]] ################################################### ### code chunk number 11: findOverlapsOfPeaks8 ################################################### makeVennDiagram(ol, totalTest=1e+2) ################################################### ### code chunk number 12: VennerableFigure ################################################### # install.packages("Vennerable", repos="http://R-Forge.R-project.org", type="source") # library(Vennerable) # venn_cnt2venn <- function(venn_cnt){ # n <- which(colnames(venn_cnt)=="Counts") - 1 # SetNames=colnames(venn_cnt)[1:n] # Weight=venn_cnt[,"Counts"] # names(Weight) <- apply(venn_cnt[,1:n], 1, paste, collapse="") # Venn(SetNames=SetNames, Weight=Weight) # } # # v <- venn_cnt2venn(ol$venn_cnt) # plot(v) ################################################### ### code chunk number 13: findOverlapsOfPeaks9 ################################################### peaks3 <- GRanges(seqnames=c("1", "2", "3", "4", "5", "6", "1", "2", "3", "4"), ranges=IRanges(start=c(967859, 2010868, 2496500, 3075966, 3123460, 3851500, 96865, 201189, 249600, 307386), end= c(967969, 2011908, 2496720, 3076166, 3123470, 3857680, 96985, 201299, 249890, 307796), names=paste("p", 1:10, sep="")), strand=c("+", "+", "+", "+", "+", "+", "-", "-", "-", "-")) ol <- findOverlapsOfPeaks(peaks1, peaks2, peaks3, maxgap=1000, connectedPeaks="min") makeVennDiagram(ol, totalTest=1e+2) ################################################### ### code chunk number 14: makeVennDiagram10 ################################################### makeVennDiagram(list(peaks1, peaks2), NameOfPeaks=c("TF1", "TF2"), maxgap=0, minoverlap =1, totalTest=100) ################################################### ### code chunk number 15: makeVennDiagram11 ################################################### makeVennDiagram(list(peaks1, peaks2, peaks3), NameOfPeaks=c("TF1", "TF2", "TF3"), maxgap=0, minoverlap =1, totalTest=100) ################################################### ### code chunk number 16: getAllPeakSequence12 ################################################### peaks <- GRanges(seqnames=c("NC_008253", "NC_010468"), ranges=IRanges(start=c(100, 500), end=c(300, 600), names=c("peak1", "peak2"))) library(BSgenome.Ecoli.NCBI.20080805) peaksWithSequences <- getAllPeakSequence(peaks, upstream=20, downstream=20, genome=Ecoli) ################################################### ### code chunk number 17: write2FASTA13 ################################################### write2FASTA(peaksWithSequences,"test.fa") ################################################### ### code chunk number 18: getEnriched14 ################################################### library(org.Hs.eg.db) over <- getEnrichedGO(annotatedPeak, orgAnn="org.Hs.eg.db", maxP=0.01, multiAdj=FALSE, minGOterm=10, multiAdjMethod="") head(over[["bp"]]) head(over[["cc"]]) head(over[["mf"]]) ################################################### ### code chunk number 19: egOrgMap15 ################################################### egOrgMap("Mus musculus") egOrgMap("Homo sapiens") ################################################### ### code chunk number 20: peaksNearBDP16 ################################################### data(myPeakList) data(TSS.human.NCBI36) annotatedBDP <- peaksNearBDP(myPeakList[1:10,], AnnotationData=TSS.human.NCBI36, MaxDistance=5000, PeakLocForDistance="middle", FeatureLocForDistance="TSS") annotatedBDP$peaksWithBDP c(annotatedBDP$percentPeaksWithBDP, annotatedBDP$n.peaks, annotatedBDP$n.peaksWithBDP) ################################################### ### code chunk number 21: summarizePatternInPeaks17 ################################################### peaks <- GRanges(seqnames=c("NC_008253", "NC_010468"), ranges=IRanges(start=c(100, 500), end=c(300, 600), names=c("peak1", "peak2"))) filepath <- system.file("extdata", "examplePattern.fa", package="ChIPpeakAnno") library(BSgenome.Ecoli.NCBI.20080805) summarizePatternInPeaks(patternFilePath=filepath, format="fasta", skip=0L, BSgenomeName=Ecoli, peaks=peaks) ################################################### ### code chunk number 22: addGeneIDs18 ################################################### data(annotatedPeak) library(org.Hs.eg.db) addGeneIDs(annotatedPeak[1:6,], orgAnn="org.Hs.eg.db", IDs2Add=c("symbol")) addGeneIDs(annotatedPeak$feature[1:6], orgAnn="org.Hs.eg.db", IDs2Add=c("symbol")) ################################################### ### code chunk number 23: workflow19 ################################################### bed <- system.file("extdata", "MACS_output.bed", package="ChIPpeakAnno") gr1 <- toGRanges(bed, format="BED", header=FALSE) ## one can also try import from rtracklayer library(rtracklayer) gr1.import <- import(bed, format="BED") identical(start(gr1), start(gr1.import)) gr1[1:2] gr1.import[1:2] #note the name slot is different from gr1 gff <- system.file("extdata", "GFF_peaks.gff", package="ChIPpeakAnno") gr2 <- toGRanges(gff, format="GFF", header=FALSE, skip=3) ol <- findOverlapsOfPeaks(gr1, gr2) makeVennDiagram(ol) ################################################### ### code chunk number 24: workflow20 ################################################### pie(table(ol$overlappingPeaks[["gr1///gr2"]]$overlapFeature)) ################################################### ### code chunk number 25: workflow21 ################################################### data(TSS.human.GRCh37) overlaps <- ol$peaklist[["gr1///gr2"]] overlaps.anno <- annotatePeakInBatch(overlaps, AnnotationData=TSS.human.GRCh37, output="overlapping", maxgap=5000L) overlaps.anno <- addGeneIDs(overlaps.anno, "org.Hs.eg.db", "symbol") head(overlaps.anno) ################################################### ### code chunk number 26: workflow22 ################################################### gr1.copy <- gr1 gr1.copy$score <- 1 binOverFeature(gr1, gr1.copy, annotationData=TSS.human.GRCh37, radius=5000, nbins=10, FUN=c(sum, length), ylab=c("score", "count"), main=c("Distribution of aggregated peak score around TSS", "Distribution of aggregated peak numbers around TSS")) ################################################### ### code chunk number 27: workflow23 ################################################### if(require(TxDb.Hsapiens.UCSC.hg19.knownGene)){ aCR<-assignChromosomeRegion(gr1, nucleotideLevel=FALSE, precedence=c("Promoters", "immediateDownstream", "fiveUTRs", "threeUTRs", "Exons", "Introns"), TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene) barplot(aCR$percentage) } ################################################### ### code chunk number 28: sessionInfo ################################################### toLatex(sessionInfo())