## ----setup, include=FALSE-------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----install,message=FALSE------------------------------------------------- BiocManager::install("QSutils") library(QSutils) ## ----load-ex--------------------------------------------------------------- filepath<-system.file("extdata","ToyData_10_50_1000.fna", package="QSutils") lst <- ReadAmplSeqs(filepath,type="DNA") lst ## ----numbhaplo------------------------------------------------------------- length(lst$hseqs) ## ----segsites-------------------------------------------------------------- SegSites(lst$hseqs) ## ----nummutations---------------------------------------------------------- TotalMutations(lst$hseqs) ## ----shannon--------------------------------------------------------------- Shannon(lst$nr) NormShannon(lst$nr) ## ----shannon var----------------------------------------------------------- ShannonVar(lst$nr) NormShannonVar(lst$nr) ## ----ginnisimpsion--------------------------------------------------------- GiniSimpson(lst$nr) GiniSimpsonMVUE(lst$nr) ## ----ginivar--------------------------------------------------------------- GiniSimpsonVar(lst$nr) ## ----hill------------------------------------------------------------------ Hill(lst$nr,q=1) ## ----hillplot, fig.cap="Hill numbers profile", eval=FALSE------------------ # HillProfile(lst$nr) # plot(HillProfile(lst$nr),type ="b", main="Hill numbers obtained # with HillProfile") ## ----hillprofile,echo=FALSE------------------------------------------------ HillProfile(lst$nr) ## ----plothill, fig.cap="Hill numbers profile", echo=FALSE------------------ plot(HillProfile(lst$nr),type ="b", main="Hill numbers obtained with HillProfile") ## ----renyi----------------------------------------------------------------- Renyi(lst$nr,q=3) ## ----reniyprofile,fig.cap="Rényi entropy profile",eval=FALSE--------------- # RenyiProfile(lst$nr) # plot(RenyiProfile(lst$nr),type ="b", main="Rényi entropy obtained with # RenyiProfile") ## ----plotreniy,echo=FALSE-------------------------------------------------- RenyiProfile(lst$nr) ## ----reniyplot,fig.cap="Rényi entropy profile",echo=FALSE------------------ plot(RenyiProfile(lst$nr),type ="b", main="Rényi entropy obtained with RenyiProfile") ## ----hcq------------------------------------------------------------------- HCq(lst$nr, q= 4) ## ----hcqvar---------------------------------------------------------------- HCqVar(lst$nr, q= 4) ## ----hcprofile,fig.cap="Havrda-Charvat entropy profile",eval=FALSE--------- # HCqProfile(lst$nr) # plot(HCqProfile(lst$nr),type ="b", main="Havrda-Charvat entropy obtained # with HCqProfile") ## ----hcqplot,echo=FALSE---------------------------------------------------- HCqProfile(lst$nr) ## ----plothcq,fig.cap="Havrda-Charvat entropy profile",echo=FALSE----------- plot(HCqProfile(lst$nr),type ="b", main="Havrda-Charvat entropy obtained with HCqProfile") ## ----dist ,message=FALSE,fig.cap="Correlation among haplotype distances"---- dst <- DNA.dist(lst$hseqs,model="raw") library(psych) D <- as.matrix(dst) rownames(D) <- sapply(rownames(D),function(str) strsplit(str, split="\\|")[[1]][1]) colnames(D) <- rownames(D) D ## ----mfe------------------------------------------------------------------- MutationFreq(dst) ## ----fad------------------------------------------------------------------- FAD(dst) ## ----pi_e------------------------------------------------------------------ NucleotideDiversity(dst) ## ----mfm------------------------------------------------------------------- nm <- nmismatch(pairwiseAlignment(lst$hseqs,lst$hseqs[1])) MutationFreq(nm=nm,nr=lst$nr,len=width(lst$hseqs)[1]) ## ----mfvar----------------------------------------------------------------- MutationFreqVar(nm,lst$nr,len=width(lst$hseqs)[1]) ## ----pi-------------------------------------------------------------------- NucleotideDiversity(dst,lst$nr) ## ----rao------------------------------------------------------------------- RaoPow(dst,4,lst$nr) ## ----raovar---------------------------------------------------------------- RaoVar(dst,lst$nr) ## ----raoprofile,fig.cap="Havrda-Charvat entropy profile",eval=FALSE-------- # RaoPowProfile(dst,lst$nr) # plot(RaoPowProfile(dst,lst$nr),type ="b", main="Rao entropy obtained # with RaoPowProfile") ## ----raoplot,echo=FALSE---------------------------------------------------- RaoPowProfile(dst,lst$nr) ## ----plotrao,fig.cap="Rao entropy profile",echo=FALSE---------------------- plot(RaoPowProfile(dst,lst$nr),type ="b", main="Rao entropy obtained with RaoPowProfile") ## ----downsampling,fig.cap="Diversity index variations due to sample size"---- set.seed(123) n <- 2000 y <- geom.series(n,0.8)+geom.series(n,0.00025) nr.pop <- round(y*1e7) thr <- 0.1 sz1 <- 5000 sz2 <- 10000 qs.sample <- function(nr.pop,sz1,sz2){ div <- numeric(6) nr.sz1 <- table(sample(length(nr.pop),size=sz1,replace=TRUE,p=nr.pop)) div[1] <- Shannon(nr.sz1) nr.sz1 <- nr.sz1[nr.sz1>=sz1*thr/100] div[3] <- Shannon(nr.sz1) div[5] <- Shannon(nr.sz1[DSFT(nr.sz1,sz1)]) nr.sz2 <- table(sample(length(nr.pop),size=sz2,replace=TRUE,p=nr.pop)) div[2] <- Shannon(nr.sz2) nr.sz2 <- nr.sz2[nr.sz2>=sz2*thr/100] div[4] <- Shannon(nr.sz2) div[6] <- Shannon(nr.sz2[DSFT(nr.sz2,sz1)]) div } hpl.sim <- replicate(2000,qs.sample(nr.pop,sz1,sz2)) nms <- paste(c(sz1,sz2)) par(mfrow=c(1,3)) boxplot(t(hpl.sim[1:2,]),names=nms,col="lavender",las=2, ylab="Shannon entropy",main="raw") boxplot(t(hpl.sim[3:4,]),names=nms,col="lavender",las=2, ylab="Shannon entropy",main="filt") boxplot(t(hpl.sim[5:6,]),names=nms,col="lavender",las=2, ylab="Shannon entropy",main="DSFT") ## ----ex-dsft--------------------------------------------------------------- set.seed(123) n <- 2000 y <- geom.series(n,0.8)+geom.series(n,0.0002) nr.pop <- round(y*1e7) rare <- nr.pop/sum(nr.pop) < 0.01 RHL <- sum(nr.pop[rare])/sum(nr.pop) round(RHL,4) ## ----ex2-dsft-------------------------------------------------------------- thr <- 0.1 sz1 <- 5000 sz2 <- 10000 qs.sample <- function(nr.pop,sz1,sz2){ div <- numeric(2) nr.sz1 <- table(sample(length(nr.pop),size=sz1,replace=TRUE,p=nr.pop)) rare <- nr.sz1/sum(nr.sz1) < 0.01 div[1] <- sum(nr.sz1[rare])/sum(nr.sz1) nr.sz2 <- table(sample(length(nr.pop),size=sz2,replace=TRUE,p=nr.pop)) rare <- nr.sz1/sum(nr.sz2) < 0.01 div[2] <- sum(nr.sz2[rare])/sum(nr.sz2) div } hpl.sim <- replicate(2000,qs.sample(nr.pop,sz1,sz2)) nms <- paste(c(sz1,sz2)) boxplot(t(hpl.sim),names=nms,col="lavender",las=2,ylab="RHL") abline(h=RHL,lty=4,col="navy") ## ----echo=FALSE------------------------------------------------------------ sessionInfo()