## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, message = FALSE)

## ---- eval=FALSE---------------------------------------------------------
#  geno <- MatrixGenotypeReader(genotype = genotype, snpID = snpID, chromosome = chromosome,
#                               position = position, scanID = scanID)
#  genoData <- GenotypeData(geno)

## ---- eval=FALSE---------------------------------------------------------
#  geno <- GdsGenotypeReader(filename = "genotype.gds")
#  genoData <- GenotypeData(geno)

## ---- eval=FALSE---------------------------------------------------------
#  snpgdsBED2GDS(bed.fn = "genotype.bed", bim.fn = "genotype.bim", fam.fn = "genotype.fam",
#                out.gdsfn = "genotype.gds")

## ---- echo=FALSE---------------------------------------------------------
library(GENESIS)
library(GWASTools)

## ----]-------------------------------------------------------------------
# read in GDS data
gdsfile <- system.file("extdata", "HapMap_ASW_MXL_geno.gds", package="GENESIS")
HapMap_geno <- GdsGenotypeReader(filename = gdsfile)
# create a GenotypeData class object
HapMap_genoData <- GenotypeData(HapMap_geno)
HapMap_genoData

## ------------------------------------------------------------------------
# create matrix of KING estimates
KINGmat <- kingToMatrix(c(system.file("extdata", "MXL_ASW.kin0", package="GENESIS"), 
    			  system.file("extdata", "MXL_ASW.kin", package="GENESIS")))
KINGmat[1:5,1:5]

## ------------------------------------------------------------------------
# run PC-AiR
mypcair <- pcair(HapMap_genoData, kinobj = KINGmat, divobj = KINGmat)

## ---- echo=FALSE, eval=FALSE---------------------------------------------
#  #kinobj and divobj are now required arguments
#  # As PCA is an unsupervised method, it is often difficult to identify what population structure each of the top PCs represents.  In order to provide some understanding of the inferred structure, it is sometimes recommended to include reference population samples of known ancestry in the analysis.  If the data set contains such reference population samples, we may want to use only those individuals as the "unrelated subset" for performing the PCA and predict values for all other sample individuals.  This can be accomplished by setting the input `unrel.set` equal to a vector, `IDs`, of the individual IDs for the reference population samples.
#  mypcair <- pcair(HapMap_genoData, unrel.set = IDs)

## ---- eval=FALSE---------------------------------------------------------
#  mypcair <- pcair(HapMap_genoData, kinobj = KINGmat, divobj = KINGmat, unrel.set = IDs)

## ------------------------------------------------------------------------
part <- pcairPartition(kinobj = KINGmat, divobj = KINGmat)

## ------------------------------------------------------------------------
head(part$unrels); head(part$rels)

## ------------------------------------------------------------------------
summary(mypcair)

## ---- fig.show='hold', fig.width=3.4, fig.height=3.4, dev.args=list(pointsize = 10, bg='white')----
# plot top 2 PCs
plot(mypcair)
# plot PCs 3 and 4
plot(mypcair, vx = 3, vy = 4)

## ------------------------------------------------------------------------
# run PC-Relate
HapMap_genoData <- GenotypeBlockIterator(HapMap_genoData, snpBlock=20000) 
mypcrelate <- pcrelate(HapMap_genoData, pcs = mypcair$vectors[,1:2], 
                       training.set = mypcair$unrels)

## ------------------------------------------------------------------------
plot(mypcrelate$kinBtwn$k0, mypcrelate$kinBtwn$kin, xlab="k0", ylab="kinship")

## ------------------------------------------------------------------------
iids <- as.character(getScanID(HapMap_genoData))
pcrelateToMatrix(mypcrelate, sample.include = iids[1:5], thresh = 2^(-11/2), scaleKin = 2)

## ---- echo=FALSE---------------------------------------------------------
close(HapMap_genoData)