## ----settings, include = FALSE-------------------------------------------------------------------- #library(knitr) #opts_chunk$set(warning=TRUE, message = FALSE, cache = FALSE, tidy = FALSE, tidy.opts = list(width.cutoff = 60)) options(width = 100) knitr::opts_chunk$set(collapse = TRUE, comment = "#>",class.source = "whiteCode") ## ---- message = FALSE, eval = FALSE--------------------------------------------------------------- # ## From Bioconductor repository # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install("fgga") # # ## Or from GitHub repository using devtools # BiocManager::install("devtools") # devtools::install_github("fspetale/fgga") ## ----setup, eval = TRUE, message=FALSE------------------------------------------------------------ library(fgga) ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- # Loading Canis lupus familiaris dataset and example R objects data(CfData) ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- # To see the summarized experiment object summary(CfData) # To see the information of characterized data dim(CfData$dxCf) colnames(CfData$dxCf)[1:20] rownames(CfData$dxCf)[1:10] head.matrix(CfData$dxCf[, 51:61], n = 10) # to see the information of GO data dim(CfData$tableCfGO) colnames(CfData$tableCfGO)[1:10] rownames(CfData$tableCfGO)[1:10] head(CfData$tableCfGO)[, 1:8] ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- # Checking the amount of annotations by GO-term apply(CfData$tableCfGO, MARGIN=2, sum) ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- library(GO.db) library(GOstats) mygraph <- GOGraph(CfData$nodesGO, GOMFPARENTS) # Delete root node called all mygraph <- subGraph(CfData$nodesGO, mygraph) # We adapt the graph to the format used by FGGA mygraph <- t(as(mygraph, "matrix")) mygraphGO <- as(mygraph, "graphNEL") # We search the root GO-term rootGO <- leaves(mygraphGO, "in") rootGO plot(mygraphGO) ## ---- message = FALSE, eval = FALSE--------------------------------------------------------------- # # We add GO-terms corresponding to Cellular Component subdomain # myGOs <- c(CfData[['nodesGO']], "GO:1902494", "GO:0032991", "GO:1990234", # "GO:0005575") # # # We build a graph respecting the GO constraints of inference to MF and CC subdomains # mygraphGO <- preCoreFG(myGOs, domains="MF-CC") # # plot(mygraphGO) ## ----message=FALSE, include=FALSE, results='hide'------------------------------------------------- mygraphGO <- as(CfData[["graphCfGO"]], "graphNEL") rootGO <- leaves(mygraphGO, "in") ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- modelFGGA <- fgga2bipartite(mygraphGO) ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- # We take a subset of Cf data to train our model idsTrain <- CfData$indexGO[["indexTrain"]][1:750] # We build our model of binary SVM classifiers modelSVMs <- lapply(CfData[["nodesGO"]], FUN = svmTrain, tableGOs = CfData[["tableCfGO"]][idsTrain, ], dxCharacterized = CfData[["dxCf"]][idsTrain, ], graphGO = mygraphGO, kernelSVM = "radial") ## ---- message = FALSE, eval = FALSE--------------------------------------------------------------- # # We calculate the reliability of each GO-term # varianceGOs <- varianceGO(tableGOs = CfData[["tableCfGO"]][idsTrain, ], # dxCharacterized = CfData[["dxCf"]][idsTrain, ], # kFold = 5, graphGO = mygraphGO, rootNode = rootGO, # kernelSVM = "radial") # # varianceGOs ## ----echo=FALSE, message=FALSE-------------------------------------------------------------------- CfData[["varianceGOs"]] varianceGOs <- CfData[["varianceGOs"]] ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- dxTestCharacterized <- CfData[["dxCf"]][CfData$indexGO[["indexTest"]][1:50], ] matrixGOTest <- svmGO(svmMoldel = modelSVMs, dxCharacterized = dxTestCharacterized, rootNode = rootGO, varianceSVM = varianceGOs) head(matrixGOTest)[,1:8] ## ----message = FALSE, eval = TRUE----------------------------------------------------------------- matrixFGGATest <- t(apply(matrixGOTest, MARGIN = 1, FUN = msgFGGA, matrixFGGA = modelFGGA, graphGO = mygraphGO, tmax = 50, epsilon = 0.001)) head(matrixFGGATest)[,1:8] ## ---- message = FALSE, eval = FALSE--------------------------------------------------------------- # HP HR HF samples # 0.5949843 0.7047748 0.6178593 2399 ## ---- message = FALSE, eval = TRUE---------------------------------------------------------------- library(PerfMeas) for (i in 1:dim(matrixFGGATest)[1]){ matrixFGGATest[which(matrixFGGATest[i, ] >= 0.5),] <- 1 matrixFGGATest[which(matrixFGGATest[i, ] < 0.5),] <- 0 } # Computing F-score Fs <- F.measure.single.over.classes( CfData$tableCfGO[rownames(matrixFGGATest), ], matrixFGGATest) # Average F-score Fs$average[4] # Computing AUC auc <- AUC.single.over.classes(CfData$tableCfGO[rownames(matrixFGGATest), ], matrixFGGATest); # Average AUC auc$average # Computing precision at different recall levels PXR <- precision.at.multiple.recall.level.over.classes( CfData$tableCfGO[rownames(matrixFGGATest), ], matrixFGGATest, seq(from = 0.1, to = 1, by = 0.1)); # Average PxR PXR$avgPXR