## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------ BiocStyle::latex() ## ----include=FALSE------------------------------------------------------- library(knitr) library(regioneR) opts_chunk$set(concordance=FALSE) set.seed(21666641) ## ------------------------------------------------------------------------ A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000) B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000)) numOverlaps(A, B, count.once=TRUE) numOverlaps(randomizeRegions(A), B, count.once=TRUE) ## ------------------------------------------------------------------------ pt <- overlapPermTest(A=A, B=B, ntimes=50) pt ## ----fig.height=6,fig.width=8-------------------------------------------- plot(pt) ## ----fig.height=6,fig.width=8-------------------------------------------- lz <- localZScore(pt=pt, A=A, B=B) plot(lz) ## ------------------------------------------------------------------------ # special <- toGRanges("http://gattaca.imppc.org/regioner/data/my.special.genes.bed") special <- toGRanges(system.file("extdata", "my.special.genes.bed", package="regioneR")) # all.genes <- toGRanges("http://gattaca.imppc.org/regioner/data/all.genes.bed") all.genes <- toGRanges(system.file("extdata", "all.genes.bed", package="regioneR")) #altered <- toGRanges("http://gattaca.imppc.org/regioner/data/my.altered.regions.bed") altered <- toGRanges(system.file("extdata", "my.altered.regions.bed", package="regioneR")) length(special) length(all.genes) length(altered) ## ------------------------------------------------------------------------ numOverlaps(special, altered) ## ------------------------------------------------------------------------ random.RS <- resampleRegions(special, universe=all.genes) random.RS ## ------------------------------------------------------------------------ random.RS <- resampleRegions(special, universe=all.genes) numOverlaps(random.RS, altered) random.RS <- resampleRegions(special, universe=all.genes) numOverlaps(random.RS, altered) random.RS <- resampleRegions(special, universe=all.genes) numOverlaps(random.RS, altered) random.RS <- resampleRegions(special, universe=all.genes) numOverlaps(random.RS, altered) ## ----eval=FALSE---------------------------------------------------------- ## # NOT RUN ## pt <- permTest(A=my.regions, B=repeats, randomize.function=randomizeRegions, ## evaluate.function=overlapRegions) ## ----eval=FALSE---------------------------------------------------------- ## # NOT RUN ## pt <- permTest(A=my.genes, randomize.function=resampleRegions, universe=all.genes, ## evaluate.function=meanInRegions, x=methylation.levels.450K) ## ------------------------------------------------------------------------ pt <- permTest(A=special, ntimes=50, randomize.function=resampleRegions, universe=all.genes, evaluate.function=numOverlaps, B=altered, verbose=FALSE) ## ------------------------------------------------------------------------ pt summary(pt) ## ----fig.height=6,fig.width=8-------------------------------------------- plot(pt) ## ----fig.height=6,fig.width=8-------------------------------------------- regular <- toGRanges("http://gattaca.imppc.org/regioner/data/my.regular.genes.bed") length(regular) numOverlaps(regular, altered) pt.reg <- permTest(A=regular, ntimes=50, randomize.function=resampleRegions, universe=all.genes, evaluate.function=numOverlaps, B=altered, verbose=FALSE) pt.reg plot(pt.reg) ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN - See Figure 1 ## pt.5000 <- permTest(A=special, ntimes=5000, randomize.function=resampleRegions, ## universe=all.genes, evaluate.function=numOverlaps, B=altered, verbose=TRUE) ## plot(pt.5000) ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN - See Figure 2 ## pt.5000.reg <- permTest(A=regular, ntimes=5000, randomize.function=resampleRegions, ## universe=all.genes, evaluate.function=numOverlaps, B=altered, verbose=TRUE) ## plot(pt.5000.reg) ## ------------------------------------------------------------------------ A <- toGRanges(data.frame(chr=c("chr1", "chr1", "chr1"), start=c(20000, 50000, 100000), end=c(22000, 70000, 400000))) ## ------------------------------------------------------------------------ randomizeRegions(A, genome="hg19") ## ------------------------------------------------------------------------ randomizeRegions(A, genome="hg19", per.chromosome=TRUE) ## ------------------------------------------------------------------------ circularRandomizeRegions(A, genome="hg19", mask=NA) ## ------------------------------------------------------------------------ gcContent <- function(A, bsgenome, ...) { A <- toGRanges(A) reg.seqs <- getSeq(bsgenome, A) base.frequency <- alphabetFrequency(reg.seqs) gc.pct <- (sum(base.frequency[,"C"]) + sum(base.frequency[,"G"]))/sum(width(A)) return(gc.pct) } ## ------------------------------------------------------------------------ permuteRegionsMetadata <- function(A, ...) { A <- toGRanges(A) mcols(A)[,1] <- mcols(A)[sample(length(A)),1] return(A) } ## ------------------------------------------------------------------------ A <- createRandomRegions(nregions=50, length.mean=5000000, length.sd=3000000) B <- c(A[1:25], createRandomRegions(nregions=25, length.mean=500000, length.sd=30000)) #Without mc.set.seed=FALSE set.seed(123) overlapPermTest(A, B, ntimes=10, force.parallel=TRUE) set.seed(123) overlapPermTest(A, B, ntimes=10, force.parallel=TRUE) #With mc.set.seed=FALSE set.seed(123) overlapPermTest(A, B, ntimes=10, mc.set.seed=FALSE, force.parallel=TRUE) set.seed(123) overlapPermTest(A, B, ntimes=10, mc.set.seed=FALSE, force.parallel=TRUE) ## ----fig.height=6,fig.width=8-------------------------------------------- pt <- permTest(A=special, ntimes=50, randomize.function=resampleRegions, universe=all.genes, evaluate.function=numOverlaps, B=altered, verbose=FALSE) lz <- localZScore(A=special, pt=pt, window=10*mean(width(special)), step=mean(width(special))/2, B=altered) plot(lz) ## ----fig.height=6,fig.width=8-------------------------------------------- genome <- filterChromosomes(getGenome("hg19"), keep.chr="chr1") B <- createRandomRegions(nregions=100, length.mean=10000, length.sd=20000, genome=genome, non.overlapping=FALSE) A <- B[sample(20)] pt <- overlapPermTest(A=A, B=B, ntimes=100, genome=genome, non.overlapping=FALSE) pt lz <- localZScore(A=A, B=B, pt=pt, window=10*mean(width(A)), step=mean(width(A))/2 ) plot(lz) ## ------------------------------------------------------------------------ A <- data.frame(chr=1, start=c(1,15,24), end=c(10,20,30), x=c(1,2,3), y=c("a","b","c")) B <- toGRanges(A) B ## ------------------------------------------------------------------------ toDataframe(B) ## ------------------------------------------------------------------------ human.genome <- getGenomeAndMask("hg19", mask=NA)$genome human.canonical <- filterChromosomes(human.genome, organism="hg") listChrTypes() human.autosomal <- filterChromosomes(human.genome, organism="hg", chr.type="autosomal") human.123 <- filterChromosomes(human.genome, keep.chr=c("chr1", "chr2", "chr3")) ## ------------------------------------------------------------------------ overlaps.df <- overlapRegions(A, B) overlaps.df ## ------------------------------------------------------------------------ overlaps.df <- overlapRegions(A, B, type="BinA", get.pctA=TRUE) overlaps.df ## ------------------------------------------------------------------------ overlaps.df <- overlapRegions(A, B, min.bases=5) overlaps.df ## ------------------------------------------------------------------------ overlaps.bool <- overlapRegions(A, B, min.bases=5, only.boolean=TRUE) overlaps.bool ## ------------------------------------------------------------------------ overlaps.int <- overlapRegions(A, B, min.bases=5, only.count=TRUE) overlaps.int ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## set.seed(12345) ## cpgHMM <- toGRanges("http://rafalab.jhsph.edu/CGI/model-based-cpg-islands-hg19.txt") ## promoters <- toGRanges("http://gattaca.imppc.org/regioner/data/UCSC.promoters.hg19.bed") ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## cpgHMM <- filterChromosomes(cpgHMM, organism="hg", chr.type="canonical") ## promoters <- filterChromosomes(promoters, organism="hg", chr.type="canonical") ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## cpgHMM_2K <- sample(cpgHMM, 2000) ## ## pt <- overlapPermTest(cpgHMM_2K, promoters, ntimes=1000, genome="hg19", count.once=TRUE) ## pt ## P-value: 0.000999000999000999 ## Z-score: 60.5237 ## Number of iterations: 1000 ## Alternative: greater ## Evaluation of the original region set: 614 ## Evaluation function: numOverlaps ## Randomization function: randomizeRegions ## mean(pt$permuted) ## 79.087 ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## plot(pt) ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## set.seed(12345) ## download.file("http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeAwgTfbsUniform/wgEncodeAwgTfbsSydhHepg2Rad21IggrabUniPk.narrowPeak.gz", "Rad21.gz") ## ## download.file("http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeAwgTfbsUniform/wgEncodeAwgTfbsUwHepg2CtcfUniPk.narrowPeak.gz", "Ctcf.gz") ## ## HepG2_Rad21 <- toGRanges(gzfile("Rad21.gz"), header=FALSE) ## HepG2_Ctcf <- toGRanges(gzfile("Ctcf.gz"), header=FALSE) ## ## promoters <- toGRanges("http://gattaca.imppc.org/regioner/data/UCSC.promoters.hg19.bed") ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## promoters <- filterChromosomes(promoters, organism="hg19") ## HepG2_Rad21_5K <- sample(HepG2_Rad21, 5000) ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## pt_Rad21_5k_vs_Ctcf <- permTest(A=HepG2_Rad21_5K, B=HepG2_Ctcf, ntimes=1000, ## randomize.function=circularRandomizeRegions, ## evaluate.function=numOverlaps, count.once=TRUE, ## genome="hg19", mc.set.seed=FALSE, mc.cores=4) ## ## pt_Rad21_5k_vs_Prom <- permTest(A=HepG2_Rad21_5K, B=promoters, ntimes=1000, ## randomize.function=circularRandomizeRegions, ## evaluate.function=numOverlaps, count.once=TRUE, ## genome="hg19", mc.set.seed=FALSE, mc.cores=4) ## ## pt_Rad21_5k_vs_Ctcf ## ## pt_Rad21_5k_vs_Prom ## ## plot(pt_Rad21_5k_vs_Ctcf, main="Rad21_5K vs CTCF") ## plot(pt_Rad21_5k_vs_Prom, main="Rad21_5K vs Promoters") ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## lz_Rad21_vs_Ctcf_1 <- localZScore(A=HepG2_Rad21_5K, B=HepG2_Ctcf, ## pt=pt_Rad21_5k_vs_Ctcf, ## window=1000, step=50, count.once=TRUE) ## ## lz_Rad21_vs_Prom_1 <- localZScore(A=HepG2_Rad21_5K, B=promoters, ## pt=pt_Rad21_5k_vs_Prom, ## window=1000, step=50, count.once=TRUE) ## ## plot(lz_Rad21_vs_Ctcf_1, main="Rad21_5k vs CTCF (1Kbp)") ## plot(lz_Rad21_vs_Prom_1, main="Rad21_5k vs promoters (1Kbp)") ## ## ----eval=FALSE---------------------------------------------------------- ## #NOT RUN ## lz_Rad21_vs_Ctcf_2 <- localZScore(A=HepG2_Rad21_5K, B=HepG2_Ctcf, ## pt=pt_Rad21_5k_vs_Ctcf, ## window=10000, step=500, count.once=TRUE) ## ## lz_Rad21_vs_Prom_2 <- localZScore(A=HepG2_Rad21_5K, B=promoters, ## pt=pt_Rad21_5k_vs_Prom, ## window=10000, step=500, count.once=TRUE) ## ## plot(lz_Rad21_vs_Ctcf_2, main="Rad21_5k vs CTCF (10Kbp)") ## plot(lz_Rad21_vs_Prom_2, main="Rad21_5k vs promoters (10Kbp)") ## ----sessionInfo--------------------------------------------------------- sessionInfo()