## ----style, eval=TRUE, echo=FALSE, results="asis"--------------------------------------- BiocStyle::latex() ## ----preliminaries, echo=FALSE, message=FALSE------------------------------------------- library(GenomeInfoDb) library(TxDb.Dmelanogaster.UCSC.dm3.ensGene) ## ----genomeStyles1---------------------------------------------------------------------- seqmap <- genomeStyles() head(seqmap,n=2) ## ----name------------------------------------------------------------------------------- names(genomeStyles()) ## ----genomeStyles2---------------------------------------------------------------------- head(genomeStyles("Homo_sapiens"),5) ## ----style-present---------------------------------------------------------------------- "UCSC" %in% names(genomeStyles("Homo_sapiens")) ## ----extractSeqlevels------------------------------------------------------------------- extractSeqlevels(species="Arabidopsis_thaliana", style="NCBI") ## ----extractSeqlevelsgroup-------------------------------------------------------------- extractSeqlevelsByGroup(species="Arabidopsis_thaliana", style="NCBI", group="auto") ## ----seqlevelsStyle--------------------------------------------------------------------- seqlevelsStyle(paste0("chr",c(1:30))) seqlevelsStyle(c("2L","2R","X","Xhet")) ## ----keepChr-txdb----------------------------------------------------------------------- newchr <- paste0("chr",c(1:22,"X","Y","M","1_gl000192_random","4_ctg9_hap1")) seqlevelsInGroup(newchr, group="sex") seqlevelsInGroup(newchr, group="auto") seqlevelsInGroup(newchr, group="circular") seqlevelsInGroup(newchr, group="sex","Homo_sapiens","UCSC") ## ----check2----------------------------------------------------------------------------- seqnames <- c("chr1", "chr9", "chr2", "chr3", "chr10") all(seqnames %in% extractSeqlevels("Homo_sapiens", "UCSC")) ## ----orderSeqlevels--------------------------------------------------------------------- seqnames <- c("chr1","chr9", "chr2", "chr3", "chr10") orderSeqlevels(seqnames) seqnames[orderSeqlevels(seqnames)] ## ----rankSeqlevels---------------------------------------------------------------------- seqnames <- c("chr1","chr9", "chr2", "chr3", "chr10") rankSeqlevels(seqnames) ## ----find------------------------------------------------------------------------------- mapSeqlevels(c("chrII", "chrIII", "chrM"), "NCBI") ## ----basic-gr--------------------------------------------------------------------------- gr <- GRanges(paste0("ch",1:35), IRanges(1:35, width=5)) gr ## ----renameseqlevels-------------------------------------------------------------------- newnames <- paste0("chr",1:35) names(newnames) <- paste0("ch",1:35) head(newnames) gr <- renameSeqlevels(gr,newnames) gr ## ----dropseqlevels---------------------------------------------------------------------- dropSeqlevels(gr, paste0("chr",23:35), pruning.mode="coarse") ## ----keepseqlevels---------------------------------------------------------------------- keepSeqlevels(gr, paste0("chr",1:22), pruning.mode="coarse") ## ----keepstdchr------------------------------------------------------------------------- keepStandardChromosomes(gr, pruning.mode="coarse") ## ----keepstdchr-2----------------------------------------------------------------------- plantgr <- GRanges(c(1:5,"MT","Pltd"), IRanges(1:7,width=5)) keepStandardChromosomes(plantgr, species="Arabidopsis thaliana", pruning.mode="coarse") ## ----genome-description-class, message=FALSE-------------------------------------------- library(BSgenome.Celegans.UCSC.ce2) class(Celegans) is(Celegans, "GenomeDescription") provider(Celegans) seqinfo(Celegans) gendesc <- as(Celegans, "GenomeDescription") class(gendesc) gendesc provider(gendesc) seqinfo(gendesc) bsgenomeName(gendesc) ## ----Seqinfo-egs------------------------------------------------------------------------ ## Note that all the arguments (except 'genome') must have the ## same length. 'genome' can be of length 1, whatever the lengths ## of the other arguments are. x <- Seqinfo(seqnames=c("chr1", "chr2", "chr3", "chrM"), seqlengths=c(100, 200, NA, 15), isCircular=c(NA, FALSE, FALSE, TRUE), genome="toy") length(x) seqnames(x) names(x) seqlevels(x) seqlengths(x) isCircular(x) genome(x) x[c("chrY", "chr3", "chr1")] # subset by names ## Rename, drop, add and/or reorder the sequence levels: xx <- x seqlevels(xx) <- sub("chr", "ch", seqlevels(xx)) # rename xx seqlevels(xx) <- rev(seqlevels(xx)) # reorder xx seqlevels(xx) <- c("ch1", "ch2", "chY") # drop/add/reorder xx seqlevels(xx) <- c(chY="Y", ch1="1", "22") # rename/reorder/drop/add xx y <- Seqinfo(seqnames=c("chr3", "chr4", "chrM"), seqlengths=c(300, NA, 15)) y merge(x, y) # rows for chr3 and chrM are merged suppressWarnings(merge(x, y)) ## Note that, strictly speaking, merging 2 Seqinfo objects is not ## a commutative operation, i.e., in general 'z1 <- merge(x, y)' ## is not identical to 'z2 <- merge(y, x)'. However 'z1' and 'z2' ## are guaranteed to contain the same information (i.e. the same ## rows, but typically not in the same order): suppressWarnings(merge(y, x)) ## This contradicts what 'x' says about circularity of chr3 and chrM: isCircular(y)[c("chr3", "chrM")] <- c(TRUE, FALSE) y if (interactive()) { merge(x, y) # raises an error } ## ----quick-style------------------------------------------------------------------------ txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene seqlevels(txdb) genomeStyles("Drosophila melanogaster") mapSeqlevels(seqlevels(txdb), "NCBI") ## ----sequence, eval=FALSE--------------------------------------------------------------- # sequence <- seqlevels(x) # # ## sequence is in UCSC format and we want NCBI style # newStyle <- mapSeqlevels(sequence,"NCBI") # newStyle <- newStyle[complete.cases(newStyle)] # removing NA cases. # # ## rename the seqlevels # x <- renameSeqlevels(x,newStyle) # # ## keep only the seqlevels you want (say autosomes) # auto <- extractSeqlevelsByGroup(species="Homo sapiens", style="NCBI", # group="auto") # x <- keepSeqlevels(x,auto) ## ----sessionInfo, results='asis', eval=TRUE--------------------------------------------- toLatex(sessionInfo())