## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)){ # install.packages("BiocManager") # } # BiocManager::install("BERT") ## ----eval=FALSE--------------------------------------------------------------- # devtools::install_github("HSU-HPC/BERT") ## ----eval=FALSE--------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)){ # install.packages("BiocManager") # } # BiocManager::install("sva") # BiocManager::install("limma") ## ----------------------------------------------------------------------------- example = data.frame(feature_1 = stats::rnorm(5), feature_2 = stats::rnorm(5), Batch=c(1,1,2,2,2)) example ## ----------------------------------------------------------------------------- nrows <- 200 ncols <- 8 expr_values <- matrix(runif(nrows * ncols, 1, 1e4), nrows) # colData also takes all other metadata information, such as Label, Sample, # Covariables etc. colData <- data.frame(Batch=c(1,1,1,1,2,2,2,2), Reference=c(1,1,0,0,1,1,0,0)) dataset_raw = SummarizedExperiment::SummarizedExperiment(assays=list(expr=expr_values), colData=colData) ## ----------------------------------------------------------------------------- library(BERT) # generate test data with 10% missing values as provided by the BERT library dataset_raw <- generate_dataset(features=60, batches=10, samplesperbatch=10, mvstmt=0.1, classes=2) # apply BERT dataset_adjusted <- BERT(dataset_raw) ## ----eval=FALSE--------------------------------------------------------------- # logging::setLevel("WARN") # set level to warn and upwards # result <- BERT(data,cores = 1) # BERT executes silently ## ----------------------------------------------------------------------------- # import BERT library(BERT) # generate data with 30 batches, 60 features, 15 samples per batch, 15% missing values and 2 classes dataset_raw <- generate_dataset(features=60, batches=20, samplesperbatch=15, mvstmt=0.15, classes=2) # BERT dataset_adjusted <- BERT(dataset_raw, method="limma") ## ----------------------------------------------------------------------------- # import BERT library(BERT) # generate data with 30 batches, 60 features, 15 samples per batch, 15% missing values and 2 classes dataset_raw <- generate_dataset(features=60, batches=20, samplesperbatch=15, mvstmt=0.15, classes=2) # BERT dataset_adjusted <- BERT(dataset_raw, cores=2) ## ----------------------------------------------------------------------------- nrows <- 200 ncols <- 8 # SummarizedExperiments store samples in columns and features in rows (in contrast to BERT). # BERT will automatically account for this. expr_values <- matrix(runif(nrows * ncols, 1, 1e4), nrows) # colData also takes further metadata information, such as Label, Sample, # Reference or Covariables colData <- data.frame("Batch"=c(1,1,1,1,2,2,2,2), "Label"=c(1,2,1,2,1,2,1,2), "Sample"=c(1,2,3,4,5,6,7,8)) dataset_raw = SummarizedExperiment::SummarizedExperiment(assays=list(expr=expr_values), colData=colData) dataset_adjusted = BERT(dataset_raw, assayname = "expr") ## ----------------------------------------------------------------------------- # import BERT library(BERT) # set seed for reproducibility set.seed(1) # generate data with 5 batches, 60 features, 30 samples per batch, 15% missing values and 2 classes dataset_raw <- generate_dataset(features=60, batches=5, samplesperbatch=30, mvstmt=0.15, classes=2) # create covariable column with 2 possible values, e.g. male/female condition dataset_raw["Cov_1"] = sample(c(1,2), size=dim(dataset_raw)[1], replace=TRUE) # BERT dataset_adjusted <- BERT(dataset_raw) ## ----------------------------------------------------------------------------- # import BERT library(BERT) # generate data with 4 batches, 6 features, 15 samples per batch, 15% missing values and 2 classes dataset_raw <- generate_dataset(features=6, batches=4, samplesperbatch=15, mvstmt=0.15, classes=2) # create reference column with default value 0. The 0 indicates, that the respective sample should be co-adjusted only. dataset_raw[, "Reference"] <- 0 # randomly select 2 references per batch and class - in practice, this choice will be determined by external requirements (e.g. class known for only these samples) batches <- unique(dataset_raw$Batch) # all the batches for(b in batches){ # iterate over all batches # references from class 1 ref_idx = sample(which((dataset_raw$Batch==b)&(dataset_raw$Label==1)), size=2, replace=FALSE) dataset_raw[ref_idx, "Reference"] <- 1 # references from class 2 ref_idx = sample(which((dataset_raw$Batch==b)&(dataset_raw$Label==2)), size=2, replace=FALSE) dataset_raw[ref_idx, "Reference"] <- 2 } # BERT dataset_adjusted <- BERT(dataset_raw, method="ref") ## ----------------------------------------------------------------------------- sessionInfo()