## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----global_options, include=FALSE----------------------------------------------------------------
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, fig.width=8, 
fig.height=8)
options(width=100) 

## ----eval=FALSE-----------------------------------------------------------------------------------
#  install.packages("devtools")
#  library("devtools")
#  install_github("transbioZI/BioMM")

## ----eval=FALSE-----------------------------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  # The following initializes usage of Bioc devel
#  BiocManager::install(version='devel')
#  BiocManager::install("BioMM")

## ----loadPkg, eval=TRUE, results="hide"-----------------------------------------------------------
library(BioMM)
library(BiocParallel)
library(parallel)
library(ranger)
library(rms)
library(glmnet)
library(e1071)
library(pROC)
library(vioplot)
library(variancePartition)
library(CMplot)

## ----studyData, eval=TRUE-------------------------------------------------------------------------
## Get DNA methylation data 
studyData <- readRDS(system.file("extdata", "/methylData.rds", 
                     package="BioMM"))
head(studyData[,1:5])
dim(studyData)

## ----annotationFile, eval=TRUE--------------------------------------------------------------------
## Load annotation data
featureAnno <- readRDS(system.file("extdata", "cpgAnno.rds", package="BioMM")) 
pathlistDB <- readRDS(system.file("extdata", "goDB.rds", package="BioMM")) 
head(featureAnno)
str(pathlistDB[1:3])

## ----pathlist, eval=TRUE--------------------------------------------------------------------------
## Map to pathways (only 100 pathways in order to reduce the runtime)
pathlistDBsub <- pathlistDB[1:100]
pathlist <- omics2pathlist(data=studyData, pathlistDBsub, featureAnno, 
                           restrictUp=100, restrictDown=20, minPathSize=10) 

## ----BioMMrandForest, eval=TRUE-------------------------------------------------------------------
## Parameters
supervisedStage1=TRUE
classifier <- "randForest"
predMode <- "classification"
paramlist <- list(ntree=300, nthreads=10)   
param1 <- MulticoreParam(workers = 1)
param2 <- MulticoreParam(workers = 10)
## If BioMM is installed from Github, please use the following assignment:
## param1 <- 1
## param2 <- 10

studyDataSub <- studyData[,1:2000] ## to reduce the runtime
set.seed(123)
result <- BioMM(trainData=studyDataSub, testData=NULL, pathlistDB, featureAnno, 
                restrictUp=100, restrictDown=10, minPathSize=10, 
                supervisedStage1, typePCA="regular", 
                resample1="BS", resample2="CV", dataMode="allTrain",
                repeatA1=50, repeatA2=1, repeatB1=20, repeatB2=1, 
                nfolds=10, FSmethod1=NULL, FSmethod2=NULL, 
                cutP1=0.1, cutP2=0.1, fdr2=NULL, FScore=param1, 
                classifier, predMode, 
                paramlist, innerCore=param2)
print(result)

## ----BioMMpara, eval=TRUE-------------------------------------------------------------------------
## SVM 
supervisedStage1=TRUE
classifier <- "SVM"
predMode <- "classification"
paramlist <- list(tuneP=FALSE, kernel="radial", 
                  gamma=10^(-3:-1), cost=10^(-3:1))

## GLM with elastic-net
supervisedStage1=TRUE
classifier <- "glmnet"
predMode <- "classification" 
paramlist <- list(family="binomial", alpha=0.5, 
                   ypeMeasure="mse", typePred="class")

## PCA + random forest
supervisedStage1=FALSE
classifier <- "randForest"
predMode <- "classification"
paramlist <- list(ntree=300, nthreads=10)  

## ----BioMMpathway, eval=TRUE----------------------------------------------------------------------
## Parameters
supervisedStage1=FALSE
classifier <- "randForest"
predMode <- "classification"
paramlist <- list(ntree=300, nthreads=10)   
param1 <- MulticoreParam(workers = 1)
param2 <- MulticoreParam(workers = 10)
## If BioMM is installed from Github, please use the following assignment:
## param1 <- 1
## param2 <- 10

set.seed(123)
result <- BioMM(trainData=studyData, testData=NULL, 
                pathlistDBsub, featureAnno, 
                restrictUp=100, restrictDown=10, minPathSize=10, 
                supervisedStage1, typePCA="regular", 
                resample1="BS", resample2="CV", dataMode="allTrain",
                repeatA1=40, repeatA2=1, repeatB1=40, repeatB2=1, 
                nfolds=10, FSmethod1=NULL, FSmethod2=NULL, 
                cutP1=0.1, cutP2=0.1, fdr2=NULL, FScore=param1, 
                classifier, predMode, paramlist, innerCore=param2)
print(result)

## ----stage2data, eval=TRUE------------------------------------------------------------------------
## Pathway level data or stage-2 data prepared by reconBySupervised()
stage2dataA <- readRDS(system.file("extdata", "/stage2dataA.rds", 
                       package="BioMM"))

head(stage2dataA[,1:5])
dim(stage2dataA)

## ----stage2dataAprepare, eval=FALSE---------------------------------------------------------------
#  #### Alternatively, 'stage2dataA' can be created by the following code:
#  ## Parameters
#  classifier <- "randForest"
#  predMode <- "probability"
#  paramlist <- list(ntree=300, nthreads=40)
#  param1 <- MulticoreParam(workers = 1)
#  param2 <- MulticoreParam(workers = 10)
#  ## If BioMM is installed from Github, please use the following assignment:
#  ## param1 <- 1
#  ## param2 <- 10
#  set.seed(123)
#  ## This will take a bit longer to run
#  stage2dataA <- reconBySupervised(trainDataList=pathlist, testDataList=NULL,
#                              resample="BS", dataMode="allTrain",
#                              repeatA=25, repeatB=1, nfolds=10,
#                              FSmethod=NULL, cutP=0.1, fdr=NULL, FScore=param1,
#                              classifier, predMode, paramlist,
#                              innerCore=param2, outFileA=NULL, outFileB=NULL)
#  

## ----stage2dataViz, eval=TRUE---------------------------------------------------------------------
param <- MulticoreParam(workers = 1) 
## If BioMM is installed from Github, please use the following assignment:
## param <- 1 
plotVarExplained(data=stage2dataA, posF=TRUE, 
                 core=param, horizontal=FALSE, fileName=NULL)

## ----topFstage2data, eval=TRUE--------------------------------------------------------------------
param <- MulticoreParam(workers = 10) 
## If BioMM is installed from Github, please use the following assignment:
## param <- 1 
topPath <- plotRankedFeature(data=stage2dataA, 
                             posF=TRUE, topF=10, 
                             blocklist=pathlist,
                             rankMetric="R2", 
                             colorMetric="size", 
                             core=param, fileName=NULL)

## ----cirPlot, eval=TRUE---------------------------------------------------------------------------
core <- MulticoreParam(workers = 10)  
## If BioMM is installed from Github, use the following assignment:
## core <- 10
pathID <- gsub("\\.", ":", rownames(topPath))
pathSet <- pathlist[is.element(names(pathlist), pathID)]
pathMatch <- pathSet[match(pathID, names(pathSet))]

cirPlot4pathway(datalist=pathMatch, 
                topPathID=names(pathMatch), core, fileName=NULL)



## ----sessioninfo, eval=TRUE-----------------------------------------------------------------------
sessionInfo()