## ---- echo=FALSE, results="hide", warning=FALSE, message=FALSE------------- suppressPackageStartupMessages({ library(ATACseqQC) library(ChIPpeakAnno) library(BSgenome.Hsapiens.UCSC.hg19) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(phastCons100way.UCSC.hg19) library(MotifDb) library(GenomicAlignments) }) knitr::opts_chunk$set(warning=FALSE, message=FALSE) ## ---- eval=FALSE----------------------------------------------------------- # library(BiocInstaller) # biocLite(c("ATACseqQC", "ChIPpeakAnno", "MotifDb", "GenomicAlignments", # "BSgenome.Hsapiens.UCSC.hg19", "TxDb.Hsapiens.UCSC.hg19.knownGene", # "phastCons100way.UCSC.hg19")) ## -------------------------------------------------------------------------- ## load the library library(ATACseqQC) ## input the bamFile from the ATACseqQC package bamfile <- system.file("extdata", "GL1.bam", package="ATACseqQC", mustWork=TRUE) bamfile.labels <- gsub(".bam", "", basename(bamfile)) ## ---- eval=FALSE----------------------------------------------------------- # source(system.file("extdata", "IGVSnapshot.R", package = "ATACseqQC")) ## -------------------------------------------------------------------------- #bamQC(bamfile, outPath=NULL) estimateLibComplexity(readsDupFreq(bamfile)) ## -------------------------------------------------------------------------- ## generate fragement size distribution fragSize <- fragSizeDist(bamfile, bamfile.labels) ## -------------------------------------------------------------------------- ## bamfile tags to be read in tags <- c("AS", "XN", "XM", "XO", "XG", "NM", "MD", "YS", "YT") ## files will be output into outPath outPath <- "splited" dir.create(outPath) ## shift the coordinates of 5'ends of alignments in the bam file library(BSgenome.Hsapiens.UCSC.hg19) seqlev <- "chr1" ## subsample data for quick run which <- as(seqinfo(Hsapiens)[seqlev], "GRanges") gal <- readBamFile(bamfile, tag=tags, which=which, asMates=TRUE) gal1 <- shiftGAlignmentsList(gal) shiftedBamfile <- file.path(outPath, "shifted.bam") export(gal1, shiftedBamfile) ## -------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txs <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene) pt <- PTscore(gal1, txs) plot(pt$log2meanCoverage, pt$PT_score, xlab="log2 mean coverage", ylab="Promoter vs Transcript") ## -------------------------------------------------------------------------- nfr <- NFRscore(gal1, txs) plot(nfr$log2meanCoverage, nfr$NFR_score, xlab="log2 mean coverage", ylab="Nucleosome Free Regions score", main="NFRscore for 200bp flanking TSSs", xlim=c(-10, 0), ylim=c(-5, 5)) ## -------------------------------------------------------------------------- library(phastCons100way.UCSC.hg19) ## run program for chromosome 1 only txs <- txs[seqnames(txs) %in% "chr1"] genome <- Hsapiens ## split the reads into NucleosomeFree, mononucleosome, ## dinucleosome and trinucleosome. objs <- splitGAlignmentsByCut(gal1, txs=txs, genome=genome, conservation=phastCons100way.UCSC.hg19) ## -------------------------------------------------------------------------- null <- writeListOfGAlignments(objs, outPath) ## list the files generated by splitBam. dir(outPath) ## ----eval=FALSE------------------------------------------------------------ # objs <- splitBam(bamfile, tags=tags, outPath=outPath, # txs=txs, genome=genome, # conservation=phastCons100way.UCSC.hg19) ## ----fig.height=4, fig.width=4--------------------------------------------- library(ChIPpeakAnno) bamfiles <- file.path(outPath, c("NucleosomeFree.bam", "mononucleosome.bam", "dinucleosome.bam", "trinucleosome.bam")) ## Plot the cumulative percentage of tag allocation in nucleosome-free ## and mononucleosome bam files. cumulativePercentage(bamfiles[1:2], as(seqinfo(Hsapiens)["chr1"], "GRanges")) ## ----fig.height=8, fig.width=4--------------------------------------------- TSS <- promoters(txs, upstream=0, downstream=1) TSS <- unique(TSS) ## estimate the library size for normalization (librarySize <- estLibSize(bamfiles)) ## calculate the signals around TSSs. NTILE <- 101 dws <- ups <- 1010 sigs <- enrichedFragments(gal=objs[c("NucleosomeFree", "mononucleosome", "dinucleosome", "trinucleosome")], TSS=TSS, librarySize=librarySize, seqlev=seqlev, TSS.filter=0.5, n.tile = NTILE, upstream = ups, downstream = dws) ## log2 transformed signals sigs.log2 <- lapply(sigs, function(.ele) log2(.ele+1)) #plot heatmap featureAlignedHeatmap(sigs.log2, reCenterPeaks(TSS, width=ups+dws), zeroAt=.5, n.tile=NTILE) ## ----fig.show="hide"------------------------------------------------------- ## get signals normalized for nucleosome-free and nucleosome-bound regions. out <- featureAlignedDistribution(sigs, reCenterPeaks(TSS, width=ups+dws), zeroAt=.5, n.tile=NTILE, type="l", ylab="Averaged coverage") ## -------------------------------------------------------------------------- ## rescale the nucleosome-free and nucleosome signals to 0~1 range01 <- function(x){(x-min(x))/(max(x)-min(x))} out <- apply(out, 2, range01) matplot(out, type="l", xaxt="n", xlab="Position (bp)", ylab="Fraction of signal") axis(1, at=seq(0, 100, by=10)+1, labels=c("-1K", seq(-800, 800, by=200), "1K"), las=2) abline(v=seq(0, 100, by=10)+1, lty=2, col="gray") ## -------------------------------------------------------------------------- ## foot prints library(MotifDb) CTCF <- query(MotifDb, c("CTCF")) CTCF <- as.list(CTCF) print(CTCF[[1]], digits=2) sigs <- factorFootprints(shiftedBamfile, pfm=CTCF[[1]], genome=genome, min.score="90%", seqlev=seqlev, upstream=100, downstream=100) ## ----fig.height=6, fig.width=6--------------------------------------------- featureAlignedHeatmap(sigs$signal, feature.gr=reCenterPeaks(sigs$bindingSites, width=200+width(sigs$bindingSites[1])), annoMcols="score", sortBy="score", n.tile=ncol(sigs$signal[[1]])) sigs$spearman.correlation sigs$Profile.segmentation ## -------------------------------------------------------------------------- vp <- vPlot(shiftedBamfile, pfm=CTCF[[1]], genome=genome, min.score="90%", seqlev=seqlev, upstream=200, downstream=200, ylim=c(30, 250), bandwidth=c(2, 1)) distanceDyad(vp, pch=20, cex=.5) ## -------------------------------------------------------------------------- path <- system.file("extdata", package="ATACseqQC", mustWork=TRUE) bamfiles <- dir(path, "*.bam$", full.name=TRUE) gals <- lapply(bamfiles, function(bamfile){ readBamFile(bamFile=bamfile, tag=character(0), which=GRanges("chr1", IRanges(1, 1e6)), asMates=FALSE) }) library(TxDb.Hsapiens.UCSC.hg19.knownGene) txs <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene) library(GenomicAlignments) plotCorrelation(GAlignmentsList(gals), txs, seqlev="chr1") ## ----sessionInfo----------------------------------------------------------- sessionInfo()