## ----objectCreation, message=FALSE--------------------------------------------
library(Biostrings)
origMAlign <-
  readDNAMultipleAlignment(filepath =
                           system.file("extdata",
                                       "msx2_mRNA.aln",
                                       package="Biostrings"),
                           format="clustal")

phylipMAlign <-
  readAAMultipleAlignment(filepath =
                          system.file("extdata",
                                      "Phylip.txt",
                                      package="Biostrings"),
                          format="phylip")

## ----renameRows---------------------------------------------------------------
rownames(origMAlign)
rownames(origMAlign) <- c("Human","Chimp","Cow","Mouse","Rat",
                          "Dog","Chicken","Salmon")
origMAlign

## ----detail, eval=FALSE-------------------------------------------------------
# detail(origMAlign)

## ----usingMasks---------------------------------------------------------------
maskTest <- origMAlign
rowmask(maskTest) <- IRanges(start=1,end=3)
rowmask(maskTest)
maskTest

colmask(maskTest) <- IRanges(start=c(1,1000),end=c(500,2343))
colmask(maskTest)
maskTest

## ----nullOut masks------------------------------------------------------------
rowmask(maskTest) <- NULL
rowmask(maskTest)
colmask(maskTest) <- NULL
colmask(maskTest)
maskTest

## ----invertMask---------------------------------------------------------------
rowmask(maskTest, invert=TRUE) <- IRanges(start=4,end=8)
rowmask(maskTest)
maskTest
colmask(maskTest, invert=TRUE) <- IRanges(start=501,end=999)
colmask(maskTest)
maskTest

## ----setup--------------------------------------------------------------------
## 1st lets null out the masks so we can have a fresh start.
colmask(maskTest) <- NULL
rowmask(maskTest) <- NULL

## ----appendMask---------------------------------------------------------------
## Then we can demonstrate how the append argument works
rowmask(maskTest) <- IRanges(start=1,end=3)
maskTest

rowmask(maskTest,append="intersect") <- IRanges(start=2,end=5)
maskTest

rowmask(maskTest,append="replace") <- IRanges(start=5,end=8)
maskTest

rowmask(maskTest,append="replace",invert=TRUE) <- IRanges(start=5,end=8)
maskTest

rowmask(maskTest,append="union") <- IRanges(start=7,end=8)
maskTest

## ----maskMotif----------------------------------------------------------------
tataMasked <- maskMotif(origMAlign, "TATA")
colmask(tataMasked)

## ----maskGaps-----------------------------------------------------------------
autoMasked <- maskGaps(origMAlign, min.fraction=0.5, min.block.width=4)
autoMasked

## ----asmatrix-----------------------------------------------------------------
full = as.matrix(origMAlign)
dim(full)
partial = as.matrix(autoMasked)
dim(partial)

## ----alphabetFreq-------------------------------------------------------------
alphabetFrequency(autoMasked)

## ----consensus----------------------------------------------------------------
consensusMatrix(autoMasked, baseOnly=TRUE)[, 84:90]
substr(consensusString(autoMasked),80,130)
consensusViews(autoMasked)

## ----cluster------------------------------------------------------------------
sdist <- stringDist(as(origMAlign,"DNAStringSet"), method="hamming")
clust <- hclust(sdist, method = "single")
png(file="badTree.png")
plot(clust)
dev.off()

## ----figure, echo=FALSE, fig=TRUE, eps=FALSE, fig.align = 'center', fig.cap='Funky tree produced by using unmasked strings'----
knitr::include_graphics("badTree.png")

## ----cluster2-----------------------------------------------------------------
sdist <- stringDist(as(autoMasked,"DNAStringSet"), method="hamming")
clust <- hclust(sdist, method = "single")
png(file="goodTree.png")
plot(clust)
dev.off()
fourgroups <- cutree(clust, 4)
fourgroups

## ----figure1, echo=FALSE, fig=TRUE, eps=FALSE, width=0.6,fig.align = 'center', height=5, fig.cap='A tree produced by using strings with masked gaps'----
knitr::include_graphics("goodTree.png")

## ----fastaExample, eval=FALSE-------------------------------------------------
# DNAStr = as(origMAlign, "DNAStringSet")
# writeXStringSet(DNAStr, file="myFile.fa")

## ----write.phylip, eval=FALSE-------------------------------------------------
# write.phylip(phylipMAlign, filepath="myFile.txt")

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()