### R code from vignette source 'vignettes/PADOG/inst/doc/PADOG.Rnw' ################################################### ### code chunk number 1: fig1 ################################################### library(KEGGdzPathwaysGEO) library(PADOG) set.seed(1) set="GSE9348" data(list=set,package="KEGGdzPathwaysGEO") x=get(set) #Extract from the dataset the required info exp=experimentData(x); dataset= exp@name dat.m=exprs(x) ano=pData(x) design= notes(exp)$design annotation= paste(x@annotation,".db",sep="") targetGeneSets= notes(exp)$targetGeneSets #run padog function on KEGG pathways #use NI=1000 for accurate results myr=padog( esetm=dat.m, group=ano$Group, paired=design=="Paired", block=ano$Block, targetgs=targetGeneSets, annotation=annotation, gslist="KEGG.db", organism="hsa", verbose=FALSE, Nmin=3, NI=50, plots=TRUE) myr[1:15,-c(4,5)] ################################################### ### code chunk number 2: PADOG.Rnw:169-225 ################################################### randomF=function(set,mygslist,minsize){ set.seed(1) #this loads the dataset in an ExpressionSet object called x data(list=set,package="KEGGdzPathwaysGEO") x=get(set) #Extract from the dataset the required info to be passed to padog exp=experimentData(x); dat.m=exprs(x) ano=pData(x) dataset= exp@name design= notes(exp)$design annotation= paste(x@annotation,".db",sep="") targetGeneSets= notes(exp)$targetGeneSets #get rid of duplicates probesets per ENTREZ ID by keeping the probeset #with smallest p-value (computed using limma) aT1=filteranot(esetm=dat.m,group=ano$Group,paired=(design=="Paired"), block=ano$Block,annotation=annotation) #create an output dataframe for this toy method with random gene set p-values mygslistSize=unlist(lapply(mygslist,function(x){length(intersect(aT1$ENTREZID,x))})) res=data.frame(ID=names(mygslist),P=runif(length(mygslist)), Size=mygslistSize,stringsAsFactors=FALSE) res$FDR=p.adjust(res$P,"fdr") #drop genesets with less than minsize genes in the current dataset res=res[res$Size>=minsize,] #compute ranks res$Rank=rank(res$P)/dim(res)[1]*100 #needed to compare ranks between methods; must be the same as given #in mymethods argument "list(myRand=" res$Method="myRand"; #needed because comparisons of ranks between methods is paired at dataset level res$Dataset<-dataset; #output only result for the targetGeneSets #which are gene sets expected to be relevant in this dataset return(res[res$ID %in% targetGeneSets,]) } #run the analysis on all 24 datasets and compare the new method "myRand" with #PADOG and GSA (if installed) (chosen as reference since is listed first in the existingMethods) #if the package parallel is installed datasets are analyzed in parallel. #out=compPADOG(datasets=NULL,existingMethods=c("GSA","PADOG"), #mymethods=list(myRand=randomF), #gslist="KEGG.db",Nmin=3,NI=1000,plots=TRUE,verbose=FALSE) #compare myRand against PADOG on 4 datasets only #mysets=data(package="KEGGdzPathwaysGEO")$results[,"Item"] mysets=c("GSE9348","GSE8671","GSE1297") out=compPADOG(datasets=mysets,existingMethods=c("PADOG"), mymethods=list(myRand=randomF), gslist="KEGG.db",Nmin=3,NI=40,plots=TRUE,verbose=FALSE) print(out)