%\VignetteIndexEntry{High-level S4 containers for HTS data (slides)} %\VignetteDepends{IRanges, GenomicRanges, Biostrings, Rsamtools, GenomicFeatures, TxDb.Dmelanogaster.UCSC.dm3.ensGene, pasillaBamSubset} \SweaveOpts{keep.source=TRUE, eps=FALSE, width=9, height=3} \documentclass[8pt]{beamer} \usepackage{BioconductorSlides} \renewcommand\Rclass[1]{{\texttt{#1}\index{#1 (class)}}} % A Beamer Quickstart: % http://www.math.umbc.edu/~rouben/beamer/ \setbeamertemplate{background canvas}[vertical shading][bottom=red!20,top=yellow!30] \setbeamercolor{block body example}{bg=white} \definecolor{YESgreen}{RGB}{0, 160, 80} \newcommand\YES{\textcolor{YESgreen}{\textbf{YES}}} \definecolor{NOgray}{RGB}{160, 160, 160} \newcommand\NO{\textcolor{NOgray}{\textbf{NO}}} \AtBeginSection[] { \begin{frame}{} \tableofcontents[currentsection,currentsubsection] \end{frame} } \AtBeginSubsection[] { \begin{frame}{} \tableofcontents[currentsection,currentsubsection] \end{frame} } \title{High-level S4 containers for HTS data} \author{Herv\'e Pag\`es} \institute[FHCRC]{Fred Hutchinson Cancer Research Center} \date{24-25 July 2012} \begin{document} <>= options(width=84) plotRanges <- function(x, xlim = x, main = deparse(substitute(x)), col = "black", sep = 0.5, ...) { height <- 1 if (is(xlim, "Ranges")) xlim <- c(min(start(xlim)), max(end(xlim))) bins <- disjointBins(IRanges(start(x), end(x) + 1)) plot.new() par(mai=c(0.5, 0.2, 1.2, 0.2)) plot.window(xlim, c(0, max(bins)*(height + sep))) ybottom <- bins * (sep + height) - height rect(start(x)-0.5, ybottom, end(x)+0.5, ybottom + height, col = col, ...) title(main, cex.main=2.8, font.main=1) axis(1) } @ \maketitle \frame{\tableofcontents} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Introduction} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{High-level vs low-level} %\begin{block}{} Small nb of high-level containers for HTS data (< 10). Built on top of 100+ low-level containers! %\end{block} \setbeamercolor{block body}{bg=red!20} \begin{block}{} \centerline{\bf High-level containers for HTS data} \medskip \begin{columns}[t] \begin{column}{0.42\textwidth} %\begin{block}{} Covered in this presentation: \begin{itemize} \item \Rclass{GRanges} \item \Rclass{GRangesList} \item \Rclass{GappedAlignments} \item \Rclass{GappedAlignmentPairs} \end{itemize} {\small Defined in the \Rpackage{GenomicRanges} package.} %\end{block} \end{column} \begin{column}{0.5\textwidth} %\begin{block}{} Not covered in this presentation: \begin{itemize} \item Defined in the \Rpackage{GenomicRanges} package: \Rclass{SummarizedExperiment} \item Defined in the \Rpackage{ShortRead} package: \Rclass{ShortRead}, \Rclass{AlignedRead} \end{itemize} %\end{block} \end{column} \end{columns} \end{block} \setbeamercolor{block body}{bg=yellow!20} \begin{block}{} \centerline{\bf Low-level containers (most frequently seen, only)} \medskip \begin{columns}[t] \begin{column}{0.42\textwidth} %\begin{block}{} Covered in this presentation: \begin{itemize} \item \Rclass{Rle} \item \Rclass{IRanges} \item \Rclass{DataFrame} \item \Rclass{CharacterList}, \Rclass{IntegerList} \item \Rclass{RleList}, \Rclass{RleViews}, \Rclass{RleViewsList} \end{itemize} {\small Defined in the \Rpackage{IRanges} package.} %\end{block} \end{column} \begin{column}{0.5\textwidth} %\begin{block}{} Not covered in this presentation: \begin{itemize} \item In the \Rpackage{IRanges} package: \Rclass{IRangesList}, \Rclass{SplitDataFrameList}, \Rclass{RangedData}, and many, many more... \item In the \Rpackage{Biostrings} package: \Rclass{DNAString}, \Rclass{DNAStringSet} \end{itemize} %\end{block} \end{column} \end{columns} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{About the implementation} \begin{block}{} S4 classes (aka {\em formal} classes) --> relies heavily on the \Rpackage{methods} package. \end{block} \begin{block}{} Current implementation tries to provide an API that is as consistent as possible. In particular: \begin{itemize} \item The end-user should never need to use \Rcode{new()}: a {\em constructor}, named as the container, is provided for each container. E.g. \Rcode{GRanges()}. \item The end-user should never need to use \Rcode{@} (aka {\em direct slot access}): slot {\em accessors} ({\em getters} and {\em setters}) are provided for each container. Not all getters have a corresponding setter! \item Standard functions/operators like \Rcode{length()}, \Rcode{names()}, \Rcode{[}, \Rcode{c()}, \Rcode{[[}, \Rcode{\$}, etc... work almost everywhere and behave ``as expected''. \item Additional functions that work almost everywhere: \Rcode{elementMetadata()}, \Rcode{elementLengths()}, \Rcode{seqinfo()}, etc... \item Consistent display (\Rcode{show} methods). \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Basic operations} \begin{block}{} {\em Vector operations}: \begin{itemize} \item Single-bracket subsetting: \Rcode{[} \item Combining: \Rcode{c()} \item Comparing: \Rcode{==}, \Rcode{!=}, \Rcode{duplicated()}, \Rcode{unique()} \item Ordering: \Rcode{<=}, \Rcode{>=}, \Rcode{<}, \Rcode{>}, \Rcode{order()}, \Rcode{sort()}, \Rcode{rank()} \end{itemize} \end{block} \begin{block}{} {\em List operations}: \begin{itemize} \item Double-bracket subsetting: \Rcode{[[} \item \Rcode{elementLengths()}, \Rcode{unlist()}, \Rcode{relist()} \item \Rcode{lapply()}, \Rcode{sapply()}, \Rcode{endoapply()} \item \Rcode{mendoapply()} (not covered in this presentation) \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Basic operations (continued)} \begin{block}{} {\em Ranges operations}: \begin{itemize} \item \Rcode{shift()}, \Rcode{narrow()}, \Rcode{resize()}, \Rcode{flank()} \item \Rcode{disjoin()} \item \Rcode{range()}, \Rcode{reduce()}, \Rcode{gaps()} \item \Rcode{union()}, \Rcode{intersect()}, \Rcode{setdiff()} \item \Rcode{punion()}, \Rcode{pintersect()}, \Rcode{psetdiff()}, \Rcode{pgap()} \end{itemize} \end{block} \begin{block}{} {\em Coercion methods}: \Rcode{as()} and all the {\em S3 forms} (\Rcode{as.vector()}, \Rcode{as.character()}, \Rcode{as.factor()}, etc...) \end{block} \begin{block}{} {\em Splitting}: \Rcode{split()} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Advanced operations} \begin{block}{} {\em Coverage and slicing}: \begin{itemize} \item \Rcode{coverage()} \item \Rcode{slice()} \end{itemize} \end{block} \begin{block}{} {\em Finding/counting overlaps}: \begin{itemize} \item \Rcode{findOverlaps()} \item \Rcode{countOverlaps()} \end{itemize} \end{block} \begin{block}{} and more... \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Most frequently seen low-level containers} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{Rle} objects} \begin{frame}[fragile] \frametitle{\Rclass{Rle} objects} \begin{block}{} \Rclass{Rle}: Run Length Encoding \end{block} \setbeamercolor{block title}{bg=green!30} \setbeamercolor{block body}{bg=green!20} \begin{block}{Supported basic operations} \begin{itemize} \item {\em Vector operations}: \YES{} \item {\em List operations}: \NO{} \item {\em Ranges operations}: \NO{} \item {\em Coercion methods}: \YES{} (to atomic vector, factor, or \Rclass{IRanges}) \item {\em Splitting}: \YES{} (produces an \Rclass{RleList} object) \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{Rle} objects (continued)} \begin{exampleblock}{} {\small <>= library(IRanges) set.seed(2012) rle1 <- Rle(sample(c(-0.9, 0), 20, replace=TRUE)) rle1 runLength(rle1) runValue(rle1) as.vector(rle1) rle1[c(TRUE, FALSE)] @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{Rle} objects (continued)} \begin{exampleblock}{} {\scriptsize <>= sort(rle1) rle1 * 50.1 sum(rle1) cumsum(rle1) cumsum(rle1) <= -4.2 rle1[cumsum(rle1) <= -4.2] @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{Rle} objects (continued)} \begin{exampleblock}{} {\small <>= rle2 <- Rle(c("ch1", "chMT", "ch1", "ch2", "chMT"), c(4, 2, 1, 5, 1)) rle2 as.vector(rle2) c(rle2, c("chMT", "chX")) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{Rle} objects (continued)} \begin{exampleblock}{} {\small <>= runValue(rle2) <- factor(runValue(rle2)) rle2 runValue(rle2) as.vector(rle2) as.factor(rle2) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{Rle} objects (continued)} \begin{exampleblock}{} {\small <>= rle1 == 0 as(rle1 == 0, "IRanges") @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{IRanges} objects} \begin{frame}[fragile] \frametitle{The purpose of the \Rclass{IRanges} container is...} ... to store a set of {\em integer ranges} (aka {\em integer intervals}). \begin{block}{} \begin{itemize} \item Each range can be defined by a {\em start} and an {\em end} value: both are included in the interval (except when the range is empty). \item The {\em width} of the range is the number of integer values in it: {\em width} = {\em end} - {\em start} + 1. \item {\em end} is always >= {\em start}, except for empty ranges where {\em end} = {\em start} - 1. \end{itemize} \end{block} \setbeamercolor{block title}{bg=green!30} \setbeamercolor{block body}{bg=green!20} \begin{block}{Supported basic operations} \begin{itemize} \item {\em Vector operations}: \YES{} \item {\em List operations}: \YES{} (not covered in this presentation) \item {\em Ranges operations}: \YES{} \item {\em Coercion methods}: \YES{} (from logical or integer vector to \Rclass{IRanges}) \item {\em Splitting}: \YES{} (produces an \Rclass{IRangesList} object) \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IRanges} objects (continued)} \begin{exampleblock}{} {\small <>= ir1 <- IRanges(start=c(12, -9, NA, 12), end=c(NA, 0, 15, NA), width=c(4, NA, 4, 3)) ir1 # "show" method not yet consistent with the other "show" methods (TODO) start(ir1) end(ir1) width(ir1) successiveIRanges(c(10, 5, 38), from=101) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IRanges} objects (continued)} \begin{columns}[t] \begin{column}{0.44\textwidth} \begin{exampleblock}{} <>= ir1[-2] ir2 <- c(ir1, IRanges(-10, 0)) ir2 @ \end{exampleblock} \end{column} \begin{column}{0.46\textwidth} \begin{exampleblock}{} <>= duplicated(ir2) unique(ir2) sort(ir2) @ \end{exampleblock} \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations} \begin{exampleblock}{} <>= ir0 <- IRanges(start=c(7, 9, 12, 14, 22:24), end=c(15, 11, 12, 18, 26, 27, 28)) png("ranges-ir0-plot.png", width=800, height=170) plotRanges(ir0, xlim=c(5, 35), main="ir0", col="blue") dev.off() @ <>= png("ranges-shift-ir0-plot.png", width=800, height=170) plotRanges(shift(ir0, 5), xlim=c(5, 35), main="shift(ir0, 5)", col="blue") dev.off() @ <>= png("ranges-disjoin-ir0-plot.png", width=800, height=170) plotRanges(disjoin(ir0), xlim=c(5, 35), main="disjoin(ir0)", col="blue") dev.off() @ <>= png("ranges-reduce-ir0-plot.png", width=800, height=170) plotRanges(reduce(ir0), xlim=c(5, 35), main="reduce(ir0)", col="blue") dev.off() @ \begin{figure} \centering \includegraphics[width=0.8\textwidth,height=!]{ranges-ir0-plot}\\ \includegraphics[width=0.8\textwidth,height=!]{ranges-shift-ir0-plot}\\ \includegraphics[width=0.8\textwidth,height=!]{ranges-disjoin-ir0-plot}\\ \includegraphics[width=0.8\textwidth,height=!]{ranges-reduce-ir0-plot} %\caption{Ranges operations} \end{figure} \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IRanges} objects (continued)} \begin{columns}[t] \begin{column}{0.44\textwidth} \begin{exampleblock}{} <>= ir1 @ \end{exampleblock} \end{column} \begin{column}{0.44\textwidth} \begin{exampleblock}{} <>= shift(ir1, -start(ir1)) flank(ir1, 10, start=FALSE) @ \end{exampleblock} \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IRanges} objects (continued)} \begin{columns}[t] \begin{column}{0.44\textwidth} \begin{exampleblock}{} <>= ir1 @ \end{exampleblock} \begin{exampleblock}{} <>= range(ir1) reduce(ir1) @ \end{exampleblock} \end{column} \begin{column}{0.46\textwidth} \begin{exampleblock}{} <>= union(ir1, IRanges(-2, 6)) intersect(ir1, IRanges(-2, 13)) setdiff(ir1, IRanges(-2, 13)) @ \end{exampleblock} \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IRanges} objects (continued)} \begin{columns}[b] \begin{column}{0.44\textwidth} \begin{exampleblock}{} <>= ir3 <- IRanges(5:1, width=12) ir3 @ \end{exampleblock} \end{column} \begin{column}{0.44\textwidth} \begin{exampleblock}{} <>= ir2 @ \end{exampleblock} \end{column} \end{columns} \begin{exampleblock}{} <>= pintersect(ir3, ir2, resolve.empty="max.start") @ \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IRanges} objects (continued)} \begin{exampleblock}{} {\small <>= ok <- c(FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE) ir4 <- as(ok, "IRanges") # from logical vector to IRanges ir4 as(which(ok), "IRanges") # from integer vector to IRanges rle2[ir4] # IRanges subscript @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{DataFrame} objects} \begin{frame}[fragile] \frametitle{\Rclass{DataFrame} objects} %\begin{block}{} \Rclass{DataFrame}: An S4 version of \Rcode{data.frame} that can hold almost anything it its columns. %\end{block} \setbeamercolor{block title}{bg=green!30} \setbeamercolor{block body}{bg=green!20} \begin{block}{Supported basic operations} \begin{itemize} \item All the \Rcode{data.frame} operations. Just manipulate a \Rclass{DataFrame} as a \Rcode{data.frame}! \item {\em Coercion methods}: from almost anything to \Rclass{DataFrame}, and from \Rclass{DataFrame} to \Rcode{data.frame}. \item {\em Splitting}: \YES{} (produces a \Rclass{SplitDataFrameList} object) \end{itemize} \end{block} \begin{exampleblock}{} {\small <>= library(Biostrings) dna <- DNAStringSet(c("AAA", "TGGATT", "CATTNGAGC", "TAATAG")) af <- alphabetFrequency(dna, baseOnly=TRUE) df <- DataFrame(dna, af) df df$G @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{DataFrame} objects (continued)} \begin{exampleblock}{} {\small <>= df$cds_id <- paste("CDS", 1:4, sep="") df$cds_range <- successiveIRanges(width(dna), from=51) df as.data.frame(df) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Other frequently seen low-level containers} \begin{frame}[fragile] \frametitle{\Rclass{CharacterList} objects} %\begin{block}{} An S4 virtual class for representing a list of character vectors. %\end{block} \begin{block}{} Exists in 2 flavors (i.e. 2 different internal representations): \begin{itemize} \item \Rclass{CompressedCharacterList} \item \Rclass{SimpleCharacterList} \end{itemize} \end{block} \begin{exampleblock}{} {\small <>= ccl <- CharacterList(one=c("aaa", "bb", "c"), two=c("dd", "e", "fff", "gggg")) @ } \end{exampleblock} \begin{columns}[t] \begin{column}{0.44\textwidth} \begin{exampleblock}{} {\small <>= ccl length(ccl) names(ccl) @ } \end{exampleblock} \end{column} \begin{column}{0.44\textwidth} \begin{exampleblock}{} {\small <>= as.list(ccl) ccl[[2]] @ } \end{exampleblock} \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{CharacterList} objects (continued)} \begin{exampleblock}{} {\small <>= toupper(ccl) elementLengths(ccl) # fast version of sapply(ccl, length) unlist(ccl) # insane! will be changed soon... unlist(ccl, use.names=FALSE) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IntegerList} objects} \begin{block}{} An S4 virtual class for representing a list of integer vectors. \end{block} \begin{block}{} Exists in 2 flavors (i.e. 2 different internal representations): \begin{itemize} \item \Rclass{CompressedIntegerList} \item \Rclass{SimpleIntegerList} \end{itemize} \end{block} \begin{exampleblock}{} {\small <>= cil <- IntegerList(6:-2, 5, integer(0), 14:21) cil cil * cil @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{IntegerList} objects (continued)} \setbeamercolor{block title example}{bg=lightgray!40} \begin{exampleblock}{2 different ways to obtain the same result:} {\small <>= cil * 100L - 2L relist(unlist(cil) * 100L - 2L, cil) @ } \end{exampleblock} \setbeamercolor{block title example}{bg=lightgray!40} \begin{exampleblock}{But the above trick would not work here:} {\small <>= cumsum(cil) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{RleList}, \Rclass{RleViews} and \Rclass{RleViewsList} objects} \begin{block}{} Typically seen when doing {\em Coverage and slicing}. \end{block} \begin{block}{} \Rclass{RleList}: An S4 virtual class for representing a list of \Rclass{Rle} objects. Exists in 2 flavors (i.e. 2 different internal representations): \begin{itemize} \item \Rclass{CompressedRleList} \item \Rclass{SimpleRleList} \end{itemize} \end{block} \begin{block}{} \Rclass{RleViews}: An S4 class for representing a set of {\em views} (i.e. ranges) defined on an \Rclass{Rle} {\em subject}. \end{block} \begin{block}{} \Rclass{RleViewsList}: An S4 virtual class for representing a list of \Rclass{RleViews} objects. Exists only in 1 flavor: \Rclass{SimpleRleViewsList}. \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{\Rclass{GRanges} objects} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{The purpose of the \Rclass{GRanges} container is...} ... to store a set of {\em genomic ranges} (aka {\em genomic regions} or {\em genomic intervals}). \begin{block}{} \begin{itemize} \item Like for \Rclass{IRanges} objects, each range can be defined by a {\em start} and an {\em end} value. \item {\em start} and {\em end} are both 1-based positions relative to the 5' end of the plus strand of the chromosome (aka {\em reference sequence}), even when the range is on the minus strand. \item The {\em start} is the leftmost position and the {\em end} is the rightmost, even when the range is on the minus strand. \item Each range is assigned a chromosome name and a strand. \end{itemize} \end{block} \setbeamercolor{block title}{bg=green!30} \setbeamercolor{block body}{bg=green!20} \begin{block}{Supported basic operations} \begin{itemize} \item {\em Vector operations}: \YES{} \item {\em List operations}: \NO{} \item {\em Ranges operations}: \YES{} \item {\em Coercion methods}: to \Rclass{RangedData} or \Rclass{IRangesList} (both not covered in this presentation) \item {\em Splitting}: \YES{} (produces a \Rclass{GRangesList} object) \end{itemize} \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{GRanges} constructor and accessors} \begin{frame}[fragile] \frametitle{\Rclass{GRanges} constructor} \begin{exampleblock}{} {\small <>= library(GenomicRanges) gr1 <- GRanges(seqnames=rep(c("ch1", "chMT"), c(2, 4)), ranges=IRanges(start=16:21, end=20), strand=rep(c("+", "-", "*"), 2)) gr1 @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRanges} accessors} \begin{exampleblock}{} {\small <>= length(gr1) seqnames(gr1) ranges(gr1) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRanges} accessors (continued)} \begin{exampleblock}{} {\small <>= start(gr1) end(gr1) width(gr1) strand(gr1) strand(gr1) <- c("-", "-", "+") strand(gr1) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRanges} accessors (continued)} \begin{exampleblock}{} {\scriptsize <>= names(gr1) <- LETTERS[1:6] names(gr1) elementMetadata(gr1) <- DataFrame(score=11:16, GC=seq(1, 0, length=6)) elementMetadata(gr1) gr1 @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRanges} accessors (continued)} \begin{exampleblock}{} <>= seqinfo(gr1) seqlevels(gr1) seqlengths(gr1) seqlengths(gr1) <- c(50000, 800) seqlengths(gr1) @ \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Vector operations on \Rclass{GRanges} objects} \begin{frame}[fragile] \frametitle{Vector operations on \Rclass{GRanges} objects} \begin{exampleblock}{} {\small <>= gr1[c("F", "A")] gr1[strand(gr1) == "+"] @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Vector operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= gr1 <- gr1[-5] gr1 @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Vector operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= gr2 <- GRanges(seqnames="ch2", ranges=IRanges(start=c(2:1,2), width=6), score=15:13, GC=seq(0, 0.4, length=3)) gr12 <- c(gr1, gr2) gr12 @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Vector operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= gr12[length(gr12)] == gr12 duplicated(gr12) unique(gr12) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Vector operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= sort(gr12) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Ranges operations on \Rclass{GRanges} objects} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRanges} objects} \begin{exampleblock}{} {\scriptsize <>= gr2 shift(gr2, 50) narrow(gr2, start=2, end=-2) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= gr1 resize(gr1, 12) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= gr1 flank(gr1, 3) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= gr3 <- shift(gr1, c(35000, rep(0, 3), 100)) width(gr3)[c(3,5)] <- 117 gr3 range(gr3) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\scriptsize <>= gr3 disjoin(gr3) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\small <>= gr3 reduce(gr3) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRanges} objects (continued)} \begin{exampleblock}{} {\scriptsize <>= gr3 gaps(gr3) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Splitting a \Rclass{GRanges} object} \begin{frame}[fragile] \frametitle{Splitting a \Rclass{GRanges} object} \begin{exampleblock}{} {\small <>= split(gr3, seqnames(gr3)) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Exercise 1} \begin{frame}[fragile] \frametitle{Exercise 1} \setbeamertemplate{enumerate items}{\alph{enumi}.} \begin{enumerate} \item Load the \Rpackage{GenomicRanges} package. \item Open the man page for the \Rclass{GRanges} class and run the examples in it. \item Shift the ranges in \Rcode{gr} by 1000 positions to the right. \item What method is called when doing \Rcode{shift()} on a \Rclass{GRanges} object? Find the man page for this method. \end{enumerate} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{\Rclass{GRangesList} objects} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{The purpose of the \Rclass{GRangesList} container is...} ... to store a list of {\em compatible} \Rclass{GRanges} objects. \begin{block}{} {\em compatible} means: \begin{itemize} \item they are relative to the same genome, \item AND they have the same columns in their \Rcode{elementMetadata} slot. \end{itemize} \end{block} \setbeamercolor{block title}{bg=green!30} \setbeamercolor{block body}{bg=green!20} \begin{block}{Supported basic operations} \begin{itemize} \item {\em Vector operations}: partially supported (no comparing or ordering) \item {\em List operations}: \YES{} \item {\em Ranges operations}: partially supported (some operations like \Rcode{disjoin()} or \Rcode{gaps()} are missing but they could/will be added) \item {\em Coercion methods}: to \Rclass{IRangesList} (not covered in this presentation) \item {\em Splitting}: \NO{} \end{itemize} \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{GRangesList} constructor and accessors} \begin{frame}[fragile] \frametitle{\Rclass{GRangesList} constructor} \begin{exampleblock}{} {\small <>= grl <- GRangesList(gr3, gr2) grl @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRangesList} accessors} \begin{exampleblock}{} <>= length(grl) @ \end{exampleblock} \begin{columns}[t] \begin{column}{0.44\textwidth} \begin{exampleblock}{} {\small <>= seqnames(grl) @ } \end{exampleblock} \end{column} \begin{column}{0.44\textwidth} \begin{exampleblock}{} {\small <>= strand(grl) @ } \end{exampleblock} \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRangesList} accessors (continued)} \begin{columns}[t] \begin{column}{0.44\textwidth} \begin{exampleblock}{} {\small <>= ranges(grl) @ } \end{exampleblock} \end{column} \begin{column}{0.44\textwidth} \begin{exampleblock}{} {\small <>= start(grl) end(grl) width(grl) @ } \end{exampleblock} \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRangesList} accessors (continued)} \begin{exampleblock}{} {\small <>= names(grl) <- c("TX1", "TX2") grl @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRangesList} accessors (continued)} \begin{exampleblock}{} {\scriptsize <>= elementMetadata(grl)$geneid <- c("GENE1", "GENE2") elementMetadata(grl) grl @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GRangesList} accessors (continued)} \begin{exampleblock}{} <>= seqinfo(grl) @ \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Vector operations on \Rclass{GRangesList} objects} \begin{frame}[fragile] \frametitle{Vector operations on \Rclass{GRangesList} objects} \begin{exampleblock}{} {\small <>= grl[c("TX2", "TX1")] @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Vector operations on \Rclass{GRangesList} objects (continued)} \begin{exampleblock}{} {\scriptsize <>= c(grl, GRangesList(gr3)) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{List operations on \Rclass{GRangesList} objects} \begin{frame}[fragile] \frametitle{List operations on \Rclass{GRangesList} objects} \begin{exampleblock}{} {\scriptsize <>= grl[[2]] elementLengths(grl) unlisted <- unlist(grl, use.names=FALSE) # same as c(grl[[1]], grl[[2]]) unlisted @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{List operations on \Rclass{GRangesList} objects (continued)} \begin{exampleblock}{} {\small <>= grl100 <- relist(shift(unlisted, 100), grl) grl100 @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{List operations on \Rclass{GRangesList} objects (continued)} \begin{exampleblock}{} {\scriptsize <>= grl100b <- endoapply(grl, shift, 100) grl100b elementMetadata(grl100) elementMetadata(grl100b) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Ranges operations on \Rclass{GRangesList} objects} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRangesList} objects} \begin{columns}[t] \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= grl @ } \end{exampleblock} \end{column} \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= shift(grl, 100) @ } \end{exampleblock} \end{column} \end{columns} \begin{block}{} \Rcode{shift(grl, 100)} is equivalent to \Rcode{endoapply(grl, shift, 100)} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRangesList} objects (continued)} \begin{columns}[t] \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= grl @ } \end{exampleblock} \end{column} \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= flank(grl, 10) @ } \end{exampleblock} \end{column} \end{columns} \begin{block}{} \Rcode{flank(grl, 10)} is equivalent to \Rcode{endoapply(grl, flank, 10)} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRangesList} objects (continued)} \begin{columns}[t] \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= grl @ } \end{exampleblock} \end{column} \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= range(grl) @ } \end{exampleblock} \end{column} \end{columns} \begin{block}{} \Rcode{range(grl)} is equivalent to \Rcode{endoapply(grl, range)} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRangesList} objects (continued)} \begin{columns}[t] \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= grl @ } \end{exampleblock} \end{column} \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= reduce(grl) @ } \end{exampleblock} \end{column} \end{columns} \begin{block}{} \Rcode{reduce(grl)} is equivalent to \Rcode{endoapply(grl, reduce)} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Ranges operations on \Rclass{GRangesList} objects (continued)} <>= grl2 <- grl grl2[[1]] <- grl2[[1]][3]; grl2[[2]] <- grl2[[2]][1] grl3 <- unname(grl2) grl3[[1]] <- narrow(unname(grl3[[1]]), start=5, end=-5) @ \begin{columns}[t] \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= grl2 grl3 @ } \end{exampleblock} \end{column} \begin{column}{0.49\textwidth} \begin{exampleblock}{} {\scriptsize <>= psetdiff(grl2, grl3) @ } \end{exampleblock} \end{column} \end{columns} \begin{block}{} \Rcode{psetdiff(grl2, grl)} is equivalent to \Rcode{mendoapply(setdiff, grl2, grl)} \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{\Rclass{GappedAlignments} and \Rclass{GappedAlignmentPairs} objects} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{GappedAlignments}} \begin{frame}[fragile] \frametitle{The purpose of the \Rclass{GappedAlignments} container is...} ... to store a set of genomic alignments (aligned reads, typically). \begin{block}{} The alignments can be loaded from a BAM file with \Rcode{readGappedAlignments()}. By default, only the following information is loaded for each alignment: \begin{itemize} \item RNAME field: name of the reference sequence to which the query is aligned. \item strand bit (from FLAG field): strand in the reference sequence to which the query is aligned. \item CIGAR field: a string in the "Extended CIGAR format" describing the "gemoetry" of the alignment (i.e. locations of insertions, deletions and gaps). See the SAM Spec for the details. \item POS field: 1-based position of the leftmost mapped base. \end{itemize} In particular, the query sequences (SEQ) and qualities (QUAL) are not loaded by default. \end{block} \setbeamercolor{block title}{bg=green!30} \setbeamercolor{block body}{bg=green!20} \begin{block}{Supported basic operations} \begin{itemize} \item {\em Vector} operations: partially supported (no comparing or ordering) \item {\em List} operations: \NO{} \item {\em Ranges} operations: only \Rcode{narrow()} and \Rcode{qnarrow()} (\Rclass{GappedAlignments} specific) are supported \item {\em Coercion methods}: to \Rclass{GRanges} or \Rclass{GRangesList} \item {\em Splitting}: \NO{} \end{itemize} \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{GappedAlignments} constructor and accessors} \begin{frame}[fragile] \frametitle{\Rclass{GappedAlignments} constructor} Typically not used directly! \begin{exampleblock}{} {\small <>= gal0 <- GappedAlignments(seqnames=Rle(c("ch1", "ch2"), c(3, 1)), pos=1L + 10L*0:3, cigar=c("36M", "20M3D16M", "20M703N16M", "14M2I20M"), strand=strand(c("+", "-", "-", "+"))) gal0 @ } \end{exampleblock} An N in the cigar indicates a gap (!= deletion). \end{frame} \begin{frame}[fragile] \frametitle{\Rcode{readGappedAlignments()}} \begin{exampleblock}{} {\small <>= library(pasillaBamSubset) U1gal <- readGappedAlignments(untreated1_chr4()) length(U1gal) head(U1gal) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GappedAlignments} accessors} \begin{exampleblock}{} {\scriptsize <>= seqnames(U1gal) table(as.factor(seqnames(U1gal))) strand(U1gal) table(as.factor(strand(U1gal))) head(cigar(U1gal)) head(qwidth(U1gal)) table(qwidth(U1gal)) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GappedAlignments} accessors (continued)} \begin{columns}[t] \begin{column}{0.38\textwidth} \begin{exampleblock}{} {\small <>= head(start(U1gal)) head(end(U1gal)) head(width(U1gal)) head(ngap(U1gal)) table(ngap(U1gal)) @ } \end{exampleblock} \end{column} \begin{column}{0.52\textwidth} \begin{exampleblock}{} {\small <>= elementMetadata(U1gal) seqinfo(U1gal) @ } \end{exampleblock} \end{column} \end{columns} \end{frame} \begin{frame}[fragile] \frametitle{Loading additional information from the BAM file} \begin{exampleblock}{} {\small <>= param <- ScanBamParam(what=c("flag", "mapq"), tag=c("NH", "NM")) U1gal <- readGappedAlignments(untreated1_chr4(), use.names=TRUE, param=param) U1gal[1:5] any(duplicated(names(U1gal))) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Exercise 2} \begin{frame}[fragile] \frametitle{Exercise 2} \setbeamertemplate{enumerate items}{\alph{enumi}.} \begin{enumerate} \item Find the SAM Spec online and investigate the meaning of predefined tags NH and NM. \item Load BAM file untreated1\_chr4.bam into a \Rclass{GappedAlignments} object and subset this object to keep only the alignments satisfying the 2 following conditions: \begin{itemize} \item The alignment corresponds to a query with a {\em unique alignment} (aka {\em unique match} or {\em unique hit}). \item The alignment is a {\em perfect match} (i.e. no insertion, no deletion, no mismatch). \end{itemize} \item Do those alignments have gaps? \end{enumerate} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{From \Rclass{GappedAlignments} to \Rclass{GRanges} or \Rclass{GRangesList}} \begin{frame}[fragile] \frametitle{From \Rclass{GappedAlignments} to \Rclass{GRanges}} GAPS ARE IGNORED! That is, each alignment is converted into a {\em single} genomic range defined by the {\em start} and {\em end} of the alignment. \begin{exampleblock}{} {\scriptsize <>= as(U1gal, "GRanges") @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{From \Rclass{GappedAlignments} to \Rclass{GRangesList}} GAPS ARE NOT IGNORED! That is, each alignment is converted into one or more genomic ranges (one more range than the number of gaps in the alignment). \begin{exampleblock}{} {\small <>= U1grl <- as(U1gal, "GRangesList") U1grl @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{From \Rclass{GappedAlignments} to \Rclass{GRangesList} (continued)} One more range than the number of gaps in the alignment: \begin{exampleblock}{} {\small <>= all(elementLengths(U1grl) == ngap(U1gal) + 1) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{\Rclass{GappedAlignmentPairs}} \begin{frame}[fragile] \frametitle{The purpose of the \Rclass{GappedAlignmentPairs} container is...} ... to store a set of aligned {\em paired-end} reads. \begin{block}{} \begin{itemize} \item Implemented on top of the \Rclass{GappedAlignments} class. \item The alignments can be loaded from a BAM file with \Rcode{readGappedAlignmentPairs()}. \item \Rcode{first(x)}, \Rcode{last(x)}: extract the {\it first} and {\it last} ends in 2 separate \Rclass{GappedAlignments} objects of the same length. \end{itemize} \end{block} \setbeamercolor{block title}{bg=green!30} \setbeamercolor{block body}{bg=green!20} \begin{block}{Supported basic operations} \begin{itemize} \item {\em Vector} operations: partially supported (no comparing or ordering) \item {\em List} operations: \YES{} \item {\em Ranges} operations: \NO{} \item {\em Coercion methods}: to \Rclass{GRanges} or \Rclass{GRangesList} \item {\em Splitting}: \NO{} \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{\Rcode{readGappedAlignmentPairs()}} \begin{exampleblock}{} {\small <>= library(pasillaBamSubset) U3galp <- readGappedAlignmentPairs(untreated3_chr4()) length(U3galp) head(U3galp) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GappedAlignmentPairs} accessors} \begin{exampleblock}{} {\scriptsize <>= head(first(U3galp)) head(last(U3galp)) @ } \end{exampleblock} Currently, \Rcode{readGappedAlignmentPairs()} drops pairs where the {\it first} and {\it last} ends have incompatible sequence names and/or strands (a rare situation). \end{frame} \begin{frame}[fragile] \frametitle{\Rclass{GappedAlignmentPairs} accessors (continued)} \begin{exampleblock}{} {\small <>= seqnames(U3galp) strand(U3galp) head(ngap(U3galp)) table(ngap(U3galp)) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{From \Rclass{GappedAlignmentPairs} to \Rclass{GRangesList}} \begin{exampleblock}{} {\small <>= U3grl <- as(U3galp, "GRangesList") U3grl @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{From \Rclass{GappedAlignmentPairs} to \Rclass{GRangesList} (continued)} \begin{exampleblock}{} {\scriptsize <>= U3grl[ngap(U3galp) != 0] @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Advanced operations} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Coverage and slicing} \begin{frame}[fragile] \frametitle{Coverage} \begin{exampleblock}{} {\scriptsize <>= U1cvg <- coverage(U1grl) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Coverage (continued)} \begin{exampleblock}{} {\small <>= mean(U1cvg) max(U1cvg) @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{Slicing the coverage} \begin{exampleblock}{} {\small <>= U1sl <- slice(U1cvg, lower=10) U1sl elementLengths(U1sl) head(U1sl$chr4) head(mean(U1sl$chr4)) head(max(U1sl$chr4)) @ } \end{exampleblock} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Finding/counting overlaps} \begin{frame}[fragile] \frametitle{Finding/counting overlaps} \begin{block}{} A typical use case: count the number of {\em hits} (aka {\em overlaps}) per transcript. \end{block} \begin{block}{} Typical input: \begin{itemize} \item A BAM file with the aligned reads ({\em single-} or {\em paired-end}). \item Transcript annotations {\em for the same reference genome} that was used to align the reads. \end{itemize} \end{block} \begin{block}{} Typical tools: \begin{itemize} \item \Rcode{readGappedAlignments()} or \Rcode{readGappedAlignmentPairs()} to load the reads in a \Rclass{GappedAlignments} or \Rclass{GappedAlignmentPairs} object. \item A \Rclass{TranscriptDb} object containing the transcript annotations. \item The \Rcode{exonsBy()} extractor (defined in the \Rpackage{GenomicFeatures} package) to extract the exons ranges grouped by transcript from the \Rclass{TranscriptDb} object. The exons ranges are returned in a \Rclass{GRangesList} object with 1 top-level element per transcript. \item The \Rcode{findOverlaps()} and/or \Rcode{countOverlaps()} functions. \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{Load the transcripts} \begin{exampleblock}{} {\small <>= library(TxDb.Dmelanogaster.UCSC.dm3.ensGene) txdb <- TxDb.Dmelanogaster.UCSC.dm3.ensGene exbytx <- exonsBy(txdb, by="tx", use.names=TRUE) exbytx @ } \end{exampleblock} \end{frame} \begin{frame}[fragile] \frametitle{{\em Single-end} overlaps} \begin{exampleblock}{} {\small <>= U1txhits <- countOverlaps(exbytx, U1grl) length(U1txhits) head(U1txhits) sum(U1txhits) # total nb of hits head(sort(U1txhits, decreasing=TRUE)) @ } \end{exampleblock} \begin{block}{} Rough counting! \begin{itemize} \item More than 1 alignment per read can be reported in the BAM file (sometimes the same read hits the same transcript many times). \item A hit is counted even if it's not {\em compatible} with the splicing of the transcript. \end{itemize} \end{block} \end{frame} \begin{frame}[fragile] \frametitle{{\em Paired-end} overlaps} \begin{exampleblock}{} {\small <>= U3txhits <- countOverlaps(exbytx, U3grl) length(U3txhits) head(U3txhits) sum(U3txhits) # total nb of hits head(sort(U3txhits, decreasing=TRUE)) @ } \end{exampleblock} Note that exons that fall within the {\it inter-read} gap are NOT considered to overlap. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Exercise 3} \begin{frame}[fragile] \frametitle{Exercise 3} \setbeamertemplate{enumerate items}{\alph{enumi}.} Use the \Rpackage{TxDb.Dmelanogaster.UCSC.dm3.ensGene} package and the result of Exercise 2 to count the number of {\em unique hits} per transcript, that is, the number of hits from reads with a {\em unique alignment}. \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Final notes} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{Final notes} \begin{block}{} Further developments: \begin{itemize} \item Some optimization to the pairing algorithm used by \Rcode{readGappedAlignmentPairs()}. \item Convenience functions for extracting the {\it inter-read gap} and computing the {\it observed template length} (aka TLEN in BAM/SAM jargon). \item Facilities for detecting/counting hits (from {\em single-} or {\em paired-end} reads) that are {\em compatible} with the splicing of the transcript. \item On user request... \end{itemize} \end{block} \begin{block}{} Resources: \begin{itemize} \item Vignettes in \Rpackage{GenomicRanges} (\Rcode{browseVignettes("GenomicRanges")}). \item \Rclass{GRanges}, \Rclass{GRangesList}, \Rclass{GappedAlignments}, and \Rclass{GappedAlignmentPairs} man pages in \Rpackage{GenomicRanges}. \item SAMtools website: \url{http://samtools.sourceforge.net/} \item \Bioconductor{} mailing lists: \url{http://bioconductor.org/help/mailing-list/} \end{itemize} \end{block} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{document}