\name{align-utils}

\alias{align-utils}

\alias{mismatch,AlignedXStringSet,missing-method}
\alias{nmatch,PairwiseAlignedFixedSubject,missing-method}
\alias{nmatch,PairwiseAlignedFixedSubjectSummary,missing-method}
\alias{nmismatch,AlignedXStringSet,missing-method}
\alias{nmismatch,PairwiseAlignedFixedSubject,missing-method}
\alias{nmismatch,PairwiseAlignedFixedSubjectSummary,missing-method}

\alias{mismatchTable}
\alias{mismatchTable,AlignedXStringSet-method}
\alias{mismatchTable,QualityAlignedXStringSet-method}
\alias{mismatchTable,PairwiseAlignedFixedSubject-method}
\alias{mismatchSummary}
\alias{mismatchSummary,AlignedXStringSet-method}
\alias{mismatchSummary,QualityAlignedXStringSet-method}
\alias{mismatchSummary,PairwiseAlignedFixedSubject-method}
\alias{mismatchSummary,PairwiseAlignedFixedSubjectSummary-method}

\alias{coverage,AlignedXStringSet-method}
\alias{coverage,PairwiseAlignedFixedSubject-method}
\alias{coverage,PairwiseAlignedFixedSubjectSummary-method}

\alias{compareStrings}
\alias{compareStrings,character,character-method}
\alias{compareStrings,XString,XString-method}
\alias{compareStrings,XStringSet,XStringSet-method}
\alias{compareStrings,AlignedXStringSet,AlignedXStringSet-method}
\alias{compareStrings,PairwiseAlignedFixedSubject,missing-method}
\alias{consensusMatrix}
\alias{consensusMatrix,character-method}
\alias{consensusMatrix,matrix-method}
\alias{consensusMatrix,list-method}
\alias{consensusMatrix,XStringSet-method}
\alias{consensusMatrix,XStringViews-method}
\alias{consensusMatrix,PairwiseAlignedFixedSubject-method}
\alias{consensusString}
\alias{consensusString,ANY-method}

% Deprecated:
\alias{consmat}
\alias{consmat,ANY-method}


\title{Utility functions related to sequence alignment}

\description{
  A variety of different functions used to deal with sequence alignments.
}

\usage{
  mismatchTable(x, shiftLeft=0L, shiftRight=0L, \dots)
  mismatchSummary(x, \dots)
  \S4method{coverage}{AlignedXStringSet}(x, start=NA, end=NA, weight=1L)
  \S4method{coverage}{PairwiseAlignedFixedSubject}(x, start=NA, end=NA, weight=1L)
  compareStrings(pattern, subject)
  \S4method{consensusMatrix}{character}(x, freq=FALSE)
  \S4method{consensusMatrix}{XStringSet}(x, baseOnly=FALSE, freq=FALSE)
  consensusString(x)
}

\details{
  \code{mismatchTable}:  a data.frame containing the positions and substrings
  of the mismatches for the \code{AlignedXStringSet} or \code{PairwiseAlignedFixedSubject}
  object.

  \code{mismatchSummary}:  a list of data.frame objects containing counts and
  frequencies of the mismatches for the \code{AlignedXStringSet} or
  \code{PairwiseAlignedFixedSubject} object.

  \code{compareStrings} combines two equal-length strings that are assumed to be aligned
  into a single character string containing that replaces mismatches with \code{"?"},
  insertions with \code{"+"}, and deletions with \code{"-"}.

  \code{consensusMatrix} computes a consensus matrix for a set of equal-length strings that
  are assumed to be aligned.

  \code{consensusString} creates the string based on a 50\% + 1 vote from the consensus
  matrix with unknowns labeled with \code{"?"}.
}

\arguments{
  \item{x}{
    A \code{character} vector or matrix, \code{XStringSet}, \code{XStringViews},
    \code{PairwiseAlignedFixedSubject}, or \code{list} of FASTA records containing the equal-length
    strings.
  }
  \item{shiftLeft, shiftRight}{
    Non-positive and non-negative integers respectively that specify how many
    preceding and succeeding characters to and from the mismatch position to
    include in the mismatch substrings.
  }
  \item{\dots}{
    Further arguments to be passed to or from other methods.
  }
  \item{start, end}{
    See \code{?\link[IRanges]{coverage}}.
  }
  \item{weight}{
    An integer vector specifying how much each element in \code{x} counts.
  }
  \item{pattern, subject}{
    The strings to compare. Can be of type \code{character}, \code{XString},
    \code{XStringSet}, \code{AlignedXStringSet}, or, in the case of
    \code{pattern}, \code{PairwiseAlignedFixedSubject}. If \code{pattern} is a
    \code{PairwiseAlignedFixedSubject} object, then \code{subject} must be missing.
  }
  \item{baseOnly}{
    \code{TRUE} or \code{FALSE}.
    If \code{TRUE}, the returned vector only contains frequencies for the
    letters in the "base" alphabet i.e. "A", "C", "G", "T" if \code{x}
    is a "DNA input", and "A", "C", "G", "U" if \code{x} is "RNA input".
    When \code{x} is a \link{BString} object (or an \link{XStringViews}
    object with a \link{BString} subject, or a \link{BStringSet} object),
    then the \code{baseOnly} argument is ignored.
  }
  \item{freq}{
    If \code{TRUE}, then letter frequencies (per position) are reported, otherwise counts.
  }
}

\seealso{
  \code{\link{pairwiseAlignment}},
  \link{XString-class}, \link{XStringSet-class}, \link{XStringViews-class},
  \link{AlignedXStringSet-class}, \link{PairwiseAlignedFixedSubject-class},
  \link{match-utils}
}

\examples{
  ## Compare two globally aligned strings
  string1 <- "ACTTCACCAGCTCCCTGGCGGTAAGTTGATC---AAAGG---AAACGCAAAGTTTTCAAG"
  string2 <- "GTTTCACTACTTCCTTTCGGGTAAGTAAATATATAAATATATAAAAATATAATTTTCATC"
  compareStrings(string1, string2)

  ## Create a consensus matrix
  nw1 <-
    pairwiseAlignment(AAStringSet(c("HLDNLKGTF", "HVDDMPNAL")), AAString("SMDDTEKMSMKL"),
      substitutionMatrix = "BLOSUM50", gapOpening = -3, gapExtension = -1)
  consensusMatrix(nw1)

  ## Examine the consensus between the bacteriophage phi X174 genomes
  data(phiX174Phage)
  phageConsmat <- consensusMatrix(phiX174Phage, baseOnly = TRUE)
  phageDiffs <- which(apply(phageConsmat, 2, max) < length(phiX174Phage))
  phageDiffs
  phageConsmat[,phageDiffs]

  ## Read in ORF data
  file <- system.file("extdata", "someORF.fa", package="Biostrings")
  orf <- read.DNAStringSet(file, "fasta")

  ## To illustrate, the following example assumes the ORF data
  ## to be aligned for the first 10 positions (patently false):
  orf10 <- DNAStringSet(orf, end=10)
  consensusMatrix(orf10, baseOnly=TRUE, freq=TRUE)
  consensusString(sort(orf10)[1:5])

  ## For the character matrix containing the "exploded" representation
  ## of the strings, do:
  as.matrix(orf10, use.names=FALSE)
}

\keyword{methods}