\name{align-utils} \alias{align-utils} \alias{mismatch,AlignedXStringSet0,missing-method} \alias{nmatch,PairwiseAlignedXStringSet,missing-method} \alias{nmatch,PairwiseAlignedFixedSubjectSummary,missing-method} \alias{nmismatch,AlignedXStringSet0,missing-method} \alias{nmismatch,PairwiseAlignedXStringSet,missing-method} \alias{nmismatch,PairwiseAlignedFixedSubjectSummary,missing-method} \alias{nedit} \alias{nedit,PairwiseAlignedXStringSet-method} \alias{nedit,PairwiseAlignedFixedSubjectSummary-method} \alias{mismatchTable} \alias{mismatchTable,AlignedXStringSet0-method} \alias{mismatchTable,QualityAlignedXStringSet-method} \alias{mismatchTable,PairwiseAlignedXStringSet-method} \alias{mismatchSummary} \alias{mismatchSummary,AlignedXStringSet0-method} \alias{mismatchSummary,QualityAlignedXStringSet-method} \alias{mismatchSummary,PairwiseAlignedFixedSubject-method} \alias{mismatchSummary,PairwiseAlignedFixedSubjectSummary-method} \alias{coverage,AlignedXStringSet0-method} \alias{coverage,PairwiseAlignedFixedSubject-method} \alias{coverage,PairwiseAlignedFixedSubjectSummary-method} \alias{compareStrings} \alias{compareStrings,character,character-method} \alias{compareStrings,XString,XString-method} \alias{compareStrings,XStringSet,XStringSet-method} \alias{compareStrings,AlignedXStringSet0,AlignedXStringSet0-method} \alias{compareStrings,PairwiseAlignedXStringSet,missing-method} \alias{consensusMatrix,PairwiseAlignedFixedSubject-method} \title{Utility functions related to sequence alignment} \description{ A variety of different functions used to deal with sequence alignments. } \usage{ nedit(x) # also nmatch and nmismatch mismatchTable(x, shiftLeft=0L, shiftRight=0L, \dots) mismatchSummary(x, \dots) \S4method{coverage}{AlignedXStringSet0}(x, shift=0L, width=NULL, weight=1L) \S4method{coverage}{PairwiseAlignedFixedSubject}(x, shift=0L, width=NULL, weight=1L) compareStrings(pattern, subject) \S4method{consensusMatrix}{PairwiseAlignedFixedSubject}(x, as.prob=FALSE, shift=0L, width=NULL, baseOnly=FALSE, gapCode="-", endgapCode="-") } \details{ \code{mismatchTable}: a data.frame containing the positions and substrings of the mismatches for the \code{AlignedXStringSet} or \code{PairwiseAlignedXStringSet} object. \code{mismatchSummary}: a list of data.frame objects containing counts and frequencies of the mismatches for the \code{AlignedXStringSet} or \code{PairwiseAlignedFixedSubject} object. \code{compareStrings} combines two equal-length strings that are assumed to be aligned into a single character string containing that replaces mismatches with \code{"?"}, insertions with \code{"+"}, and deletions with \code{"-"}. } \arguments{ \item{x}{ A \code{character} vector or matrix, \code{XStringSet}, \code{XStringViews}, \code{PairwiseAlignedXStringSet}, or \code{list} of FASTA records containing the equal-length strings. } \item{shiftLeft, shiftRight}{ Non-positive and non-negative integers respectively that specify how many preceding and succeeding characters to and from the mismatch position to include in the mismatch substrings. } \item{\dots}{ Further arguments to be passed to or from other methods. } \item{shift, width}{ See \code{?\link[IRanges]{coverage}}. } \item{weight}{ An integer vector specifying how much each element in \code{x} counts. } \item{pattern, subject}{ The strings to compare. Can be of type \code{character}, \code{XString}, \code{XStringSet}, \code{AlignedXStringSet}, or, in the case of \code{pattern}, \code{PairwiseAlignedXStringSet}. If \code{pattern} is a \code{PairwiseAlignedXStringSet} object, then \code{subject} must be missing. } \item{as.prob}{ If \code{TRUE} then probabilities are reported, otherwise counts (the default). } \item{baseOnly}{ \code{TRUE} or \code{FALSE}. If \code{TRUE}, the returned vector only contains frequencies for the letters in the "base" alphabet i.e. "A", "C", "G", "T" if \code{x} is a "DNA input", and "A", "C", "G", "U" if \code{x} is "RNA input". When \code{x} is a \link{BString} object (or an \link{XStringViews} object with a \link{BString} subject, or a \link{BStringSet} object), then the \code{baseOnly} argument is ignored. } \item{gapCode, endgapCode}{ The codes in the appropriate \code{\link{alphabet}} to use for the internal and end gaps. } } \seealso{ \code{\link{pairwiseAlignment}}, \code{\link{consensusMatrix}}, \link{XString-class}, \link{XStringSet-class}, \link{XStringViews-class}, \link{AlignedXStringSet-class}, \link{PairwiseAlignedXStringSet-class}, \link{match-utils} } \examples{ ## Compare two globally aligned strings string1 <- "ACTTCACCAGCTCCCTGGCGGTAAGTTGATC---AAAGG---AAACGCAAAGTTTTCAAG" string2 <- "GTTTCACTACTTCCTTTCGGGTAAGTAAATATATAAATATATAAAAATATAATTTTCATC" compareStrings(string1, string2) ## Create a consensus matrix nw1 <- pairwiseAlignment(AAStringSet(c("HLDNLKGTF", "HVDDMPNAL")), AAString("SMDDTEKMSMKL"), substitutionMatrix = "BLOSUM50", gapOpening = -3, gapExtension = -1) consensusMatrix(nw1) ## Examine the consensus between the bacteriophage phi X174 genomes data(phiX174Phage) phageConsmat <- consensusMatrix(phiX174Phage, baseOnly = TRUE) phageDiffs <- which(apply(phageConsmat, 2, max) < length(phiX174Phage)) phageDiffs phageConsmat[,phageDiffs] } \keyword{methods}