## ----setup, include=FALSE------------------------------------------------ knitr::opts_chunk$set(echo = TRUE, message = FALSE) ## ---- eval=FALSE--------------------------------------------------------- # geno <- MatrixGenotypeReader(genotype = genotype, snpID = snpID, chromosome = chromosome, # position = position, scanID = scanID) # genoData <- GenotypeData(geno) ## ---- eval=FALSE--------------------------------------------------------- # geno <- GdsGenotypeReader(filename = "genotype.gds") # genoData <- GenotypeData(geno) ## ---- eval=FALSE--------------------------------------------------------- # snpgdsBED2GDS(bed.fn = "genotype.bed", bim.fn = "genotype.bim", fam.fn = "genotype.fam", # out.gdsfn = "genotype.gds") ## ---- echo=FALSE--------------------------------------------------------- library(GENESIS) library(GWASTools) ## ----]------------------------------------------------------------------- # read in GDS data gdsfile <- system.file("extdata", "HapMap_ASW_MXL_geno.gds", package="GENESIS") HapMap_geno <- GdsGenotypeReader(filename = gdsfile) # create a GenotypeData class object HapMap_genoData <- GenotypeData(HapMap_geno) HapMap_genoData ## ------------------------------------------------------------------------ # create matrix of KING estimates KINGmat <- kingToMatrix(c(system.file("extdata", "MXL_ASW.kin0", package="GENESIS"), system.file("extdata", "MXL_ASW.kin", package="GENESIS"))) KINGmat[1:5,1:5] ## ------------------------------------------------------------------------ # run PC-AiR mypcair <- pcair(HapMap_genoData, kinobj = KINGmat, divobj = KINGmat) ## ---- echo=FALSE, eval=FALSE--------------------------------------------- # #kinobj and divobj are now required arguments # # As PCA is an unsupervised method, it is often difficult to identify what population structure each of the top PCs represents. In order to provide some understanding of the inferred structure, it is sometimes recommended to include reference population samples of known ancestry in the analysis. If the data set contains such reference population samples, we may want to use only those individuals as the "unrelated subset" for performing the PCA and predict values for all other sample individuals. This can be accomplished by setting the input `unrel.set` equal to a vector, `IDs`, of the individual IDs for the reference population samples. # mypcair <- pcair(HapMap_genoData, unrel.set = IDs) ## ---- eval=FALSE--------------------------------------------------------- # mypcair <- pcair(HapMap_genoData, kinobj = KINGmat, divobj = KINGmat, unrel.set = IDs) ## ------------------------------------------------------------------------ part <- pcairPartition(kinobj = KINGmat, divobj = KINGmat) ## ------------------------------------------------------------------------ head(part$unrels); head(part$rels) ## ------------------------------------------------------------------------ summary(mypcair) ## ---- fig.show='hold', fig.width=3.4, fig.height=3.4, dev.args=list(pointsize = 10, bg='white')---- # plot top 2 PCs plot(mypcair) # plot PCs 3 and 4 plot(mypcair, vx = 3, vy = 4) ## ------------------------------------------------------------------------ # run PC-Relate HapMap_genoData <- GenotypeBlockIterator(HapMap_genoData, snpBlock=20000) mypcrelate <- pcrelate(HapMap_genoData, pcs = mypcair$vectors[,1:2], training.set = mypcair$unrels) ## ------------------------------------------------------------------------ plot(mypcrelate$kinBtwn$k0, mypcrelate$kinBtwn$kin, xlab="k0", ylab="kinship") ## ------------------------------------------------------------------------ iids <- as.character(getScanID(HapMap_genoData)) pcrelateToMatrix(mypcrelate, sample.include = iids[1:5], thresh = 2^(-11/2), scaleKin = 2) ## ---- echo=FALSE--------------------------------------------------------- close(HapMap_genoData)