\name{AlignedGenomeIntervals-class} \Rdversion{1.1} \docType{class} \alias{AlignedGenomeIntervals-class} \alias{AlignedGenomeIntervals} \alias{[,AlignedGenomeIntervals,ANY,ANY-method} \alias{c.AlignedGenomeIntervals} \alias{c,AlignedGenomeIntervals-method} \alias{chrlengths} \alias{chrlengths,AlignedGenomeIntervals-method} \alias{chrlengths<-} \alias{chrlengths<-,AlignedGenomeIntervals,numeric-method} \alias{clusters,AlignedGenomeIntervals-method} \alias{clusters,Genome_intervals-method} \alias{coerce,AlignedRead,AlignedGenomeIntervals-method} \alias{coerce,AlignedGenomeIntervals,RangedData-method} \alias{coverage,AlignedGenomeIntervals-method} \alias{detail,AlignedGenomeIntervals-method} \alias{export} \alias{export,AlignedGenomeIntervals,character,character-method} \alias{export,Genome_intervals,character,ANY-method} \alias{extend} \alias{extend,AlignedGenomeIntervals-method} \alias{extend,Genome_intervals_stranded-method} \alias{extend,Genome_intervals-method} \alias{id,AlignedGenomeIntervals-method} \alias{id<-} \alias{id<-,AlignedGenomeIntervals,character-method} \alias{hist,AlignedGenomeIntervals-method} \alias{interval_included,AlignedGenomeIntervals,Genome_intervals_stranded-method} \alias{interval_included,Genome_intervals_stranded,AlignedGenomeIntervals-method} \alias{interval_included,AlignedGenomeIntervals,AlignedGenomeIntervals-method} \alias{interval_overlap,AlignedGenomeIntervals,Genome_intervals-method} \alias{interval_overlap,Genome_intervals,AlignedGenomeIntervals-method} \alias{interval_overlap,AlignedGenomeIntervals,Genome_intervals_stranded-method} \alias{interval_overlap,Genome_intervals_stranded,AlignedGenomeIntervals-method} \alias{interval_overlap,AlignedGenomeIntervals,AlignedGenomeIntervals-method} \alias{matches} \alias{matches,AlignedGenomeIntervals-method} \alias{matches<-} \alias{matches<-,AlignedGenomeIntervals,integer-method} \alias{nchar,AlignedGenomeIntervals-method} \alias{organism,AlignedGenomeIntervals-method} \alias{organism<-} \alias{organism<-,AlignedGenomeIntervals,character-method} \alias{plot,AlignedGenomeIntervals-method} \alias{plot,AlignedGenomeIntervals,ANY-method} \alias{plot,AlignedGenomeIntervals,missing-method} \alias{plot,AlignedGenomeIntervals,Genome_intervals_stranded-method} \alias{reads} \alias{reads,AlignedGenomeIntervals-method} \alias{reads<-} \alias{reads<-,AlignedGenomeIntervals,character-method} \alias{reduce,AlignedGenomeIntervals-method} \alias{reduce,Genome_intervals-method} \alias{sample,AlignedGenomeIntervals-method} \alias{score,AlignedGenomeIntervals-method} \alias{score<-,AlignedGenomeIntervals,numeric-method} \alias{score<-} \alias{seq_name} \alias{seq_name,AlignedGenomeIntervals-method} \alias{seq_name,Genome_intervals-method} \alias{show,AlignedGenomeIntervals-method} \alias{sort,AlignedGenomeIntervals-method} \alias{strand,AlignedGenomeIntervals-method} \alias{strand<-,AlignedGenomeIntervals,vector-method} \alias{strand<-,AlignedGenomeIntervals,factor-method} \alias{subset,AlignedGenomeIntervals-method} \alias{width,AlignedGenomeIntervals-method} \alias{chromosome,AlignedGenomeIntervals-method} \alias{chromosome,Genome_intervals-method} \title{Class 'AlignedGenomeIntervals'} \description{ A class for representing reads from next-generation sequencing experiments that have been aligned to genomic intervals. } \section{Objects from the Class}{ Objects can be created either by: \enumerate{ \item calls of the form \code{new("AlignedGenomeIntervals", .Data, closed, ...)}. \item using the auxiliary function \code{AlignedGenomeIntervals} and supplying separate vectors of same length which hold the required information:\cr \code{AlignedGenomeIntervals(start, end, chromosome, strand, reads, matches, sequence)}\cr If arguments \code{reads} or \code{matches} are not specified, they are assumed to be '1' for all intervals. \item or, probably the most common way, by coercing from objects of class \code{AlignedRead}. } } \section{Slots}{ \describe{ \item{\code{.Data}:}{two-column integer matrix, holding the start and end coordinates of the intervals on the chromosomes} \item{\code{sequence}:}{character; sequence of the read aligned to the interval} \item{\code{reads}:}{integer; total number of reads that were aligned to this interval} \item{\code{matches}:}{integer; the total number of genomic intervals that reads which were aligned to this interval were aligned to. A value of '1' thus means that this read sequence matches uniquely to this one genome interval only} \item{\code{organism}:}{string; an identifier for the genome of which organism the intervals are related to. Functions making use of this slot require a specific annotation package \code{org..eg.db}. For example if \code{organism} is 'Hs', the annotation package 'org.Hs.eg.db' is utilised by these functions. The annotation packages can be obtained from the Bioconductor repositories.} \item{\code{annotation}:}{data.frame; see class \code{genome_intervals} for details} \item{\code{closed}:}{matrix; see class \code{genome_intervals} for details} \item{\code{type}:}{character; see class \code{genome_intervals} for details} \item{\code{score}:}{numeric; optional score for each aligned genome interval} \item{\code{id}:}{character; optional identifier for each aligned genome interval} \item{\code{chrlengths}:}{integer; optional named integer vector of chromosome lengths for the respective genome; if present it is used in place of the chromosome lengths retrieved from the annotation package (see slot \code{organism})} } } \section{Extends}{ Class \code{\link[genomeIntervals]{Genome_intervals-class}}, directly. Class \code{\link[intervals:Intervals-class]{Intervals_full}}, by class "Genome_intervals", distance 2. } \section{Methods}{ \describe{ \item{coerce}{Coercion method from objects of class \code{AlignedRead}, which is defined in package \code{ShortRead}, to objects of class \code{AlignedGenomeIntervals}} \item{coerce}{Coercion method from objects of class \code{AlignedGenomeIntervals} to objects of class \code{RangedData}, which is defined in package \code{IRanges}} \item{coverage}{\code{signature("AlignedGenomeIntervals")}: computes the read coverage over all chromosomes. If the \code{organism} of the object is set correctly, the chromosome lengths are retrieved from the appropriate annotation package, otherwise the maximum interval end is taken to be the absolute length of that chromosome (strand).\cr The result of this method is a list and the individual list elements are of class \code{Rle}, a class for encoding long repetitive vectors that is defined in package \code{IRanges}.\cr The additional argument \code{byStrand} governs whether the coverage is computed separately for each strand. If \code{byStrand=FALSE} (default) only one result is returned per chromosome. If \code{byStrand=TRUE}, there result is two separate \code{Rle} objects per chromosome with the strand appended to the chromosome name.\cr By now, the \code{coverage} method for \code{AlignedGenomeIntervals} makes use of the method for \code{RangedData} objects from package \code{IRanges} (thanks to a suggestion from P. Aboyoun). } \item{detail}{\code{signature("AlignedGenomeIntervals")}: a more detailed output of all the intervals than provided by \code{show}; only advisable for objects containing few intervals} \item{extend}{\code{signature("AlignedGenomeIntervals")} with additional arguments \code{fiveprime=0L} and \code{threeprime=0L}. These must be integer numbers and greater than or equal to 0. They specify how much is subtracted from the left border of the interval and added to the right side. Which end is 5' and which one is 3' are determined from the strand information of the object. Lastly, if the object has an \code{organism} annotation, it is checked that the right ends of the intervals do not exceed the respective chromosome lengths.} \item{export}{export the aligned intervals as tab-delimited text files which can be uploaded to the UCSC genome browser as \sQuote{custom tracks}. Currently, there are methods for exporting the data into \sQuote{bed} format and \sQuote{bedGraph} format, either writing the intervals from both strands into one file or into two separate files (formats \sQuote{bedStrand} and \sQuote{bedGraphStrand}, respectively). Details about these track formats can be found at the UCSC genome browser web pages.\cr The additional argument \code{writeHeader} can be set to \code{FALSE} to suppress writing of the track definition header line to the file.\cr For \code{Genome_intervals} objects, only \sQuote{bed} format is supported at the moment and does not need to be specified. } \item{hist}{\code{signature("AlignedGenomeIntervals")}: creates a histogram of the lengths of the reads aligned to the intervals} \item{organism}{Get or set the organism that the genome intervals in the object correspond to. Should be a predefined code, such as 'Mm' for mouse and 'Hs' for human. The reason for this code, that, if the organism is set, a corresponding annotation package that is called \code{org..eg.db} is used, for example for obtaining the chromosome lengths to be used in methods such as \code{coverage}. These annotation packages can be obtained from the Bioconductor repository. } \item{plot}{visualisation method; a second argument of class \code{Genome_intervals_stranded} can be provided for additional annotation to the plot. Please see below and in the vignette for examples. Refer to the documentation of \code{\link{plotAligned}} for more details on the plotting function.} \item{reduce}{collapse/reduce aligned genome intervals by combining intervals which are completely included in each other, combining overlapping intervals AND combining immediately adjacent intervals (if \code{method="standard"}). Intervals are only combined if they are on the same chromosome, the same strand AND have the same match specificity of the aligned reads. \cr If you only want to combine intervals that have exactly the same start and stop position (but may have reads of slightly different sequence aligned to them), then use the argument \code{method="exact"}. \cr If you only want to combine intervals that have exactly the same 5' or 3' end (but may differ in the other end and in the aligned sequence), then use the argument \code{method="same5"} (same 5' end) or \code{method="same3"} (same 3' end). \cr Finally, it's possible to only collapse/reduce aligned genome intervals that overlap each other by at least a certain fraction using the argument \code{min.frac}. \code{min.frac} is a number between 0.0 and 1.0. For example, if you call \code{reduce} with argument \code{min.frac=0.4}, only intervals that overlap each other by at least 40 percent are collapsed/merged. } \item{sample}{draw a random sample of \code{n} (Argument \code{size}) of the aligned reads (without or with replacement) and returns the \code{AlignedGenomeIntervals} object defined by these aligned reads.} \item{score}{access or set a custom score for the object} \item{sort}{sorts the intervals by chromosome name, start and end coordinate in increasing order (unless \code{decreasing=TRUE} is specified) and returns the sorted object} \item{subset}{take a subset of reads, matrix-like subsetting via '\[' can also be used} } } \author{Joern Toedling} \seealso{ \code{\link[genomeIntervals]{Genome_intervals-class}}, \code{\link[ShortRead]{AlignedRead-class}}, \code{\link[IRanges]{RangedData-class}}, \code{\link[IRanges]{RangedData-class}}, \code{\link{plotAligned}} } \examples{ ############# toy example: A <- new("AlignedGenomeIntervals", .Data=cbind(c(1,3,4,5,8,10), c(5,5,6,8,9,11)), annotation=data.frame( seq_name=factor(rep(c("chr1","chr2","chr3"), each=2)), strand=factor(c("-","-","+","+","+","+") ,levels=c("-","+")), inter_base=rep(FALSE, 6)), reads=rep(3L, 6), matches=rep(1L,6), sequence=c("ACATT","ACA","CGT","GTAA","AG","CT")) show(A) detail(A) ## alternative initiation of this object: A <- AlignedGenomeIntervals( start=c(1,3,4,5,8,10), end=c(5,5,6,8,9,11), chromosome=rep(c("chr2","chrX","chr1"), each=2), strand=c("-","-","+","+","+","+"), sequence=c("ACATT","ACA","CGT","GGAA","AG","CT"), reads=c(1L, 5L, 2L, 7L, 3L, 3L)) detail(A) ## custom identifiers can be assigned to the intervals id(A) <- paste("gi", 1:6, sep="") ## subsetting and combining detail(A[c(1:4)]) detail(c(A[1], A[4])) ## sorting: always useful A <- sort(A) detail(A) ## the 'reduce' method provides a cleaned-up, compact set detail(reduce(A)) ## with arguments specifying additional conditions for merging detail(reduce(A, min.frac=0.8)) ## 'sample' to draw a sample subset of reads and their intervals detail(sample(A, 10)) ## biological example exDir <- system.file("extdata", package="girafe") exA <- readAligned(dirPath=exDir, type="Bowtie", pattern="aravinSRNA_23_no_adapter_excerpt_mm9_unmasked.bwtmap") exAI <- as(exA, "AlignedGenomeIntervals") organism(exAI) <- "Mm" show(exAI) ## which chromosomes are the intervals on? table(chromosome(exAI)) ## subset exAI[is.element(chromosome(exAI), c("chr1","chr2"))] ## compute coverage per chromosome: coverage(exAI[is.element(chromosome(exAI), c("chr1","chr2"))]) ### plotting: load(file.path(exDir, "mgi_gi.RData")) plot(exAI, mgi.gi, chr="chrX", start=50400000, end=50410000) ### overlap with annotated genome elements: exOv <- interval_overlap(exAI, mgi.gi) ## how many elements do read match positions generally overlap: table(listLen(exOv)) ## what are the 13 elements overlapped by a single match position: mgi.gi[exOv[[which.max(listLen(exOv))]]] ## what kinds of elements are overlapped (tabOv <- table(as.character(mgi.gi$type)[unlist(exOv)])) ### display those classes: my.cols <- rainbow(length(tabOv)) pie(tabOv, col=my.cols, radius=0.85) } \keyword{classes}