## ----style, echo = FALSE, results = 'asis'--------------------------------- knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), warning=FALSE) ## ----setup, echo=FALSE----------------------------------------------------- suppressPackageStartupMessages({ library(SummarizedExperiment) library(GenomicAlignments) library(AnnotationHub) library(biomaRt) library(airway) library(org.Hs.eg.db) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(RNAseqData.HNRNPC.bam.chr14) library(Organism.dplyr) }) ## ----ranges, message=FALSE------------------------------------------------- library(GenomicRanges) gr <- GRanges("A", IRanges(c(10, 20, 22), width=5), "+") shift(gr, 1) # intra-range range(gr) # inter-range reduce(gr) # inter-range snps <- GRanges("A", IRanges(c(11, 17, 24), width=1)) findOverlaps(snps, gr) # between-range ## ----SummarizedExperiment-------------------------------------------------- library(SummarizedExperiment) library(airway) data(airway) airway colData(airway) airway[, airway$dex %in% "trt"] chr14 <- as(seqinfo(airway), "GRanges")["14"] airway[airway %over% chr14,] ## -------------------------------------------------------------------------- library(org.Hs.eg.db) org <- org.Hs.eg.db select(org, "BRCA1", c("ENSEMBL", "GENENAME"), "SYMBOL") ## -------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene promoters(txdb) ## -------------------------------------------------------------------------- library(Organism.dplyr) src = src_organism("TxDb.Hsapiens.UCSC.hg19.knownGene") src select(src, "BRCA1", c("ensembl", "genename"),"symbol") promoters(src) tbl(src, "id") ## -------------------------------------------------------------------------- library(AnnotationHub) hub = AnnotationHub() hub query(hub, c("ensembl", "81.gtf")) hub[["AH48004"]] ## -------------------------------------------------------------------------- library(RNAseqData.HNRNPC.bam.chr14) fls = RNAseqData.HNRNPC.bam.chr14_BAMFILES basename(fls) ## -------------------------------------------------------------------------- library(GenomicAlignments) bfls = BamFileList(fls) bfl = bfls[[1]] ## -------------------------------------------------------------------------- ga = readGAlignments(bfl) ga table(strand(ga)) ## -------------------------------------------------------------------------- tail(sort(table(cigar(ga)))) ga[cigar(ga) != "72M"] ## -------------------------------------------------------------------------- summarizeJunctions(ga) junctions <- summarizeJunctions(ga, with.revmap=TRUE) ga[ junctions$revmap[[1]] ] ## -------------------------------------------------------------------------- coverage(bfl)$chr14 ## -------------------------------------------------------------------------- library(airway) data(airway) airway ## -------------------------------------------------------------------------- x <- assay(airway) class(x) dim(x) head(x) colData(airway) rowRanges(airway) ## -------------------------------------------------------------------------- cidx <- colData(airway)$dex %in% "trt" airway[, cidx] ## shortcut airway[, airway$dex %in% "trt"] ## -------------------------------------------------------------------------- chr14 <- as(seqinfo(rowRanges(airway)), "GRanges")["14"] ridx <- rowRanges(airway) %over% chr14 airway[ridx,] ## shortcut chr14 <- as(seqinfo(airway), "GRanges")["14"] airway[airway %over% chr14,] ## -------------------------------------------------------------------------- library(org.Hs.eg.db) ## -------------------------------------------------------------------------- select(org.Hs.eg.db, "HNRNPC", c("ENTREZID", "GENENAME"), "SYMBOL") sym2eg <- mapIds(org.Hs.eg.db, "HNRNPC", "ENTREZID", "SYMBOL") ## -------------------------------------------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## -------------------------------------------------------------------------- gns <- genes(txdb) exonsBy(txdb, "gene")[sym2eg] ## -------------------------------------------------------------------------- library(Rsamtools) param <- ScanBamParam(which=gns[sym2eg]) readGAlignments(bfl, param=param) ## -------------------------------------------------------------------------- library(Organism.dplyr) src = src_organism("TxDb.Hsapiens.UCSC.hg19.knownGene") src ## -------------------------------------------------------------------------- tbl(src, "id") tbl(src, "id") %>% dplyr::select(symbol, ensembl, alias) %>% filter( symbol == "BRCA1") tbl(src, "id") %>% dplyr::select(symbol, ensembl, entrez) %>% filter(symbol %in% c("BRCA1", "BRCA2")) %>% collect(n=Inf) %>% unique ## -------------------------------------------------------------------------- src tbl(src, "ranges_tx") tbl(src, "id") %>% filter(symbol == "BRCA1") %>% dplyr::select(entrez, ensembl) %>% collect(n=Inf) %>% unique ranges = tbl(src, "ranges_tx") %>% filter(entrez == "672") ranges ranges %>% collect(n=Inf) %>% GRanges ## -------------------------------------------------------------------------- library(AnnotationHub) hub <- AnnotationHub() query(hub, c("epigenome", "metadata")) meta <- hub[["AH41830"]] ## -------------------------------------------------------------------------- table(meta$ANATOMY) meta[meta$ANATOMY == "LIVER",] ## -------------------------------------------------------------------------- query(hub, c("E118", "mnemonic")) E118 <- hub[["AH46971"]] E118 ## -------------------------------------------------------------------------- table(E118$name) E118[E118$name %in% "Heterochromatin"] ## ----biomart, eval=FALSE--------------------------------------------------- # library(biomaRt) # head(listMarts(), 3) ## list marts # head(listDatasets(useMart("ensembl")), 3) ## mart datasets # ensembl <- ## fully specified mart # useMart("ensembl", dataset = "hsapiens_gene_ensembl") # # head(listFilters(ensembl), 3) ## filters # myFilter <- "chromosome_name" # substr(filterOptions(myFilter, ensembl), 1, 50) ## return values # myValues <- c("21", "22") # head(listAttributes(ensembl), 3) ## attributes # myAttributes <- c("ensembl_gene_id","chromosome_name") # # ## assemble and query the mart # res <- getBM(attributes = myAttributes, filters = myFilter, # values = myValues, mart = ensembl)