\name{callBindingSites-methods} \docType{methods} \alias{callBindingSites-methods} \alias{callBindingSites} \alias{callBindingSites,ANY-method} \alias{callBindingSites,character-method} \alias{callBindingSites,matrix-method} \alias{callBindingSites,ReadCounts-method} \title{Predict protein binding sites from high-throughput sequencing data} \description{ Methods for function \code{callBindingSites} in Package `ChIPseqR'. These methods are used to identify protein binding sites from ChIP-seq data. } \section{Methods}{ \describe{ \item{data = "ANY"}{Default method to handle all forms of input not explicitly handled by their own method. In particular this will be used for objects of class \code{\link[ShortRead:AlignedRead]{AlignedRead}} and \code{data.frame} but it will handle class for which a \code{\link{strandPileup}} method is available.} \item{data = "character"}{Allows to use a file name referring to a file of mapped sequence reads as input.} \item{data = "matrix"}{Uses a matrix of read counts (for a single chromosome) as input.} \item{data = "ReadCounts"}{This methods implements the peak calling algorithm. Other methods will typically reformat their input and pass it on to this method.} }} \usage{ \S4method{callBindingSites}{ANY}(data, chrLen, plot=TRUE, verbose=TRUE, ..., plotTo) \S4method{callBindingSites}{character}(data, type, minQual=70, ...) \S4method{callBindingSites}{matrix}(data, chrName="chr", ...) \S4method{callBindingSites}{ReadCounts}(data, bind, support, background, bgCutoff=0.9, supCutoff=0.9, fdr = 0.05, extend=1, tailCut=0.95, piLambda=0.5, adapt=FALSE, corSummary=median, compress = TRUE, digits = 16, plot=TRUE, verbose=TRUE, ask=FALSE, plotTo, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{data}{Either an object containing information about mapped reads or a list. See below for details.} \item{bind}{Length of binding region to use (see Details).} \item{support}{Length of support region to use (see Details).} \item{background}{Length of background window. If this is missing it will be set to 10*(\code{bind}+2*\code{support}).} \item{chrLen}{Numeric vector indicating the length of all chromosomes. Only needed when \code{data} is an \code{\link[ShortRead:AlignedRead]{AlignedRead}} object. \code{\link[ShortRead:readBfaToc]{readBfaToc}} may be used to supply this information.} \item{bgCutoff}{Numeric value between 0.5 and 1. This determines how much estimates of the background read density are allowed to vary for adjacent windows. Set to 1 to disable cutoff.} \item{supCutoff}{Numeric value between 0.5 and 1. This determines how much estimates of the support region read density are allowed to vary for forward and reverse strand. Set to 1 to disable cutoff.} \item{fdr}{Target false discovery rate.} \item{extend}{Numeric value indicating how far mapped reads should be extended when calculating read counts.} \item{type}{Format of alignment file (see \code{\link[ShortRead:readAligned]{readAligned}} forr details).} \item{minQual}{Minimum alignment quality to use. All reads with lower alignment quality are discarded.} \item{tailCut}{Truncation point used to exclude outliers when estimating null distribution.} \item{chrName}{Name to use for the single chromosome.} \item{piLambda}{If \code{adapt=TRUE} this parameter is used to estimate the proportion of scores not related to binding sites.} \item{adapt}{Logical indicating whether an adaptive false discovery rate should be used. If this is \code{FALSE} (the default) the usual Benjamini-Hochberg procedure is used to control the FDR.} \item{corSummary}{Function used to summarise cross-correlation across chromosomes. See the Details section on binding and support region. } \item{compress}{Logical indicating whether the return value should be compressed.} \item{digits}{Number of decimal places to retain for binding site score for compression.} \item{plot}{Logical. If \code{plot=TRUE} (the default) some diagnostic plots are produced during the analysis.} \item{verbose}{Logical. If \code{verbose=TRUE} (the default) status messages are printed to indicate progress.} \item{ask}{Logical. Setting this to \code{TRUE} causes the system to wait for user input before displaying a new plot. See \code{\link[grDevices:devAskNewPage]{devAskNewPage}}.} \item{plotTo}{Character string giving the name of a file that should be used to store plots generated during the analysis. If this is not missing a pdf file with the given name will be created.} \item{\dots}{Additional arguments. Most methods pass them on to the \code{ReadCounts} method.} } \details{ The length of binding and support regions can either be given as a single value or as a range of possible values (by providing the minimum and maximum). In the latter case the cross-correlation between read counts on forward and reverse strand will be used to determine a value within that range. Note that this may lead sub-optimal choices of binding and support region length. } \value{ An object of class \code{\linkS4class{BindScore}} if \code{compress = FALSE}, otherwise an object of class \code{\linkS4class{RLEBindScore}} } \seealso{\code{\link{simpleNucCall}} for an interface with nucleosome specific defaults. This function uses \code{\link{strandPileup}}, \code{\link{startScore}}, \code{\link{getCutoff}} and \code{\link{pickPeak}}. See the help pages of these functions for additional detail on the individual steps involved. See \code{\link{getBindLen}} for details on the estimation of binding and support region length.} \examples{ set.seed(1) ## determine binding site locations b <- sample(1:1e6, 5000) ## sample read locations fwd <- unlist(lapply(b, function(x) sample((x-83):(x-73), 20, replace=TRUE))) rev <- unlist(lapply(b, function(x) sample((x+73):(x+83), 20, replace=TRUE))) ## add some background noise fwd <- c(fwd, sample(1:(1e6-25), 50000)) rev <- c(rev, sample(25:1e6, 50000)) ## create data.frame with read positions as input to strandPileup reads <- data.frame(chromosome="chr1", position=c(fwd, rev), length=25, strand=factor(rep(c("+", "-"), times=c(150000, 150000)))) ## create object of class ReadCounts readPile <- strandPileup(reads, chrLen=1e6, extend=1, plot=FALSE) ## predict binding site locations ## the artificial dataset is very small so predictions may not be very reliable bindScore <- callBindingSites(readPile, bind=147, support=20, background=2000, plot=FALSE) } \keyword{methods} \keyword{models} \keyword{htest}