% \VignetteIndexEntry{IlluminaHowTo Vignette} % \VignetteKeywords{copy number, genotype, SNP} % \VignettePackage{VanillaICE} \documentclass[11pt]{article} \usepackage{amsmath} \usepackage{graphicx} \parskip=.3cm \textwidth=6.2in \textheight=8.5in \oddsidemargin=.1in \evensidemargin=.1in \headheight=-.3in \newcommand{\R}{\textsf{R}} \newcommand{\Rpackage}[1]{\textit{#1}} \newcommand{\Rfunction}[1]{\texttt{#1}} \newcommand{\Robject}[1]{\texttt{#1}} \title{HowTo: Creating HMM objects from BeadStudio-processed Illumina arrays} \author{Robert B. Scharpf} \begin{document} \maketitle <>= options(width=65) @ This vignette describes how to create an instance of an \Robject{RatioSnpSet} from Illumina data. \section{Reading in the data} To illustrate, an example of BeadStudio output obtained from the Pevsner website\\ (\texttt{http://pevsnerlab.kennedykrieger.org/SNPtrio03.htm}) is included with this package. The Pevsner laboratory provide the following instructions for saving the data using Illumina's BeadStudio in the appropriate format: \begin{enumerate} \item select the "Full Data Table" tab \item click on the "Column Chooser" icon \item in the "Displayed Columns" area, keep "Name", "Chr" and "Position", hide the rest \item the "Displayed Subcolumns" area, keep "GType" and "Log R Ratio", hide the rest \item click on "Export Displayed Data to File" icon; finally, save the file \end{enumerate} A subset of 1000 SNPs is included with this package and can be loaded by <>= library(VanillaICE) pathToIlluminaData <- system.file("illumina", package="VanillaICE") illuminaEx <- read.table(paste(pathToIlluminaData, "/illuminaEx.txt", sep=""), sep="\t", as.is=TRUE) @ The following code converts this data.frame to an object of class \Robject{oligoSnpSet}: <>= gt <- illuminaEx[, "S1135.GType", drop=FALSE] gt[gt == "AA"] <- 1 gt[gt == "BB"] <- 3 gt[gt == "AB"] <- 2 gt[gt == "NC"] <- 4 gt <- as.matrix(as.integer(gt[[1]])) logR <- as.matrix(as.numeric(illuminaEx[, "S1135.Log.R.Ratio"])) colnames(gt) <- colnames(logR) <- "S1135" rownames(logR) <- rownames(gt) <- illuminaEx[, "Name"] fd <- new("AnnotatedDataFrame", data=data.frame(position=illuminaEx[, "Position"], chromosome=integer2chromosome(illuminaEx[, "Chr"]), stringsAsFactors=FALSE), varMetadata=data.frame(labelDescription=c("position", "chromosome"))) featureNames(fd) <- illuminaEx[, "Name"] ##callsConfidence <- ratioConfidence <- matrix(NA, nrow=nrow(ratio), ncol=ncol(ratio)) ##rownames(callsConfidence) <- rownames(ratioConfidence) <- rownames(ratio) ##colnames(callsConfidence) <- colnames(ratioConfidence) <- colnames(ratio) ##snpset <- new("RatioSnpSet", ## ratio=ratio, ## ratioConfidence=ratioConfidence, ## calls=gt, ## callsConfidence=callsConfidence, ## featureData=fd, ## ## annotation="Illumina550k") snpset <- new("oligoSnpSet", copyNumber=logR, calls=gt, phenoData=annotatedDataFrameFrom(logR, byrow=FALSE), featureData=fd, annotation="Illumina550k") ##no annotation package as yet chrom <- chromosome2integer(fd$chromosome) snpset <- snpset[order(chrom, fd$position), ] stopifnot(validObject(snpset)) @ We can now use methods from the \R{} package \Rpackage{SNPchip} to plot the data: <<>>= gp <- plotSnp(snpset) gp$cex <- 3 gp$ylab <- "copy number ratio" gp$abline <- TRUE; gp$abline.h <- c(0.5, 1, 3/2); gp$abline.col <- "grey20" gp$abline.lty <- c(2, 1, 2) @ <>= print(gp) @ %Parameters for the HMM are generated by creating an instance of the %\Robject{HmmParameter} class using the method \Rfunction{new}: <>= featureData(snpset)$arm <- getChromosomeArm(snpset) options <- new("HmmOptions", snpset=snpset, states=c("D", "N", "L", "A"), copyNumber.location=c(1, 2, 2, 3), probHomCall=c(0.99, 0.75, 0.99, 0.75)) params <- new("HmmParameter", states=states(options)) emission(params) <- copyNumber.emission(options) genomicDistance(params) <- exp(-2*diff(position(snpset))/(100*1e6)) transitionScale(params) <- scaleTransitionProbability(states(options)) @ %To fit the HMM, <>= fit <- hmm(options, params) @ %%Here we visualize the data as well as the predicted states (in this %%example, the entire region is normal): <>= gp <- plotSnp(options@snpset, fit) gp$add.cytoband <- FALSE gp$ylab <- "copy number ratio" gp$abline <- TRUE; gp$abline.h <- c(0.5, 1, 3/2); gp$abline.col <- "grey20" gp$col.predict[states(fit) == "N"] <- "grey30" gp$abline.lty <- c(2, 1, 2) gp$hmm.ycoords <- c(0.4, 0.45) gp$ylim <- c(0.2, 2) @ <>= print(gp) legend(0, 0.35, title="predicted states", fill="grey30", legend="ratio is 1", bty="n") @ \section*{Session Information} The version number of R and packages loaded for generating the vignette were: <>= toLatex(sessionInfo()) @ \end{document}