## ----setup, include = FALSE---------------------------------------------------
library(CDI)

## ----eval = FALSE-------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("CDI")

## ----eval = FALSE-------------------------------------------------------------
#  if (!requireNamespace("remotes", quietly = TRUE))
#      install.packages("remotes")
#  remotes::install_github("jichunxie/CDI", build_vignettes = TRUE)

## ----data_reading-------------------------------------------------------------
data(one_batch_matrix, package = "CDI")
dim(one_batch_matrix)

data(one_batch_matrix_celltype, package = "CDI")
table(one_batch_matrix_celltype)

## ----data_label---------------------------------------------------------------
data(one_batch_matrix_label_df, package = "CDI")
knitr::kable(head(one_batch_matrix_label_df[,c("KMeans_k2", "KMeans_k4", "Seurat_k2", "Seurat_k3")], 3))

## ----feature_selection--------------------------------------------------------
feature_gene_indx <- feature_gene_selection(
	X = one_batch_matrix, 
	batch_label = NULL, 
	method = "wds", 
	nfeature = 500)
sub_one_batch_matrix <- one_batch_matrix[feature_gene_indx,]

## -----------------------------------------------------------------------------
one_batch_matrix_size_factor <- size_factor(X = one_batch_matrix)

## ----calculate_cdi1-----------------------------------------------------------
start_time <- Sys.time()
CDI_return1 <- calculate_CDI(
	X = sub_one_batch_matrix, 
	cand_lab_df = one_batch_matrix_label_df, 
	batch_label = NULL, 
	cell_size_factor = one_batch_matrix_size_factor)
end_time <- Sys.time()
print(difftime(end_time, start_time))

## -----------------------------------------------------------------------------
knitr::kable(CDI_return1)

## -----------------------------------------------------------------------------
CDI_lineplot(cdi_dataframe = CDI_return1, cdi_type = "CDI_BIC")

## -----------------------------------------------------------------------------
contingency_heatmap(benchmark_label = one_batch_matrix_celltype,
	candidate_label = one_batch_matrix_label_df$KMeans_k5,
	rename_candidate_clusters = TRUE,
	candidate_cluster_names = paste0('cluster', seq_len(length(unique(one_batch_matrix_label_df$KMeans_k5)))))

## -----------------------------------------------------------------------------
benchmark_return1 <- calculate_CDI(X = sub_one_batch_matrix,
	cand_lab_df = one_batch_matrix_celltype, 
	batch_label = NULL, 
	cell_size_factor = one_batch_matrix_size_factor)

## -----------------------------------------------------------------------------
CDI_lineplot(cdi_dataframe = CDI_return1,
	cdi_type = "CDI_BIC",
	benchmark_celltype_cdi = benchmark_return1,
	benchmark_celltype_ncluster = length(unique(one_batch_matrix_celltype)))

## ----data_reading2------------------------------------------------------------
data(two_batch_matrix_celltype, package = "CDI")
table(two_batch_matrix_celltype)

data(two_batch_matrix_batch, package = "CDI")
table(two_batch_matrix_batch)

data(two_batch_matrix, package = "CDI")
dim(two_batch_matrix)


## ----data_label2, out.width="70%"---------------------------------------------
data(two_batch_matrix_label_df, package = "CDI")
knitr::kable(head(two_batch_matrix_label_df[,c("KMeans_k5", "KMeans_k6", "Seurat_k5", "Seurat_k6")], 3))

## ----feature_selection2-------------------------------------------------------
feature_gene_indx <- feature_gene_selection(
	X = two_batch_matrix,
	batch_label = two_batch_matrix_batch,
	method = "wds",
	nfeature = 500)
sub_two_batch_matrix <- two_batch_matrix[feature_gene_indx,]

## -----------------------------------------------------------------------------
two_batch_matrix_size_factor <- size_factor(two_batch_matrix)

start_time <- Sys.time()
CDI_return2 <- calculate_CDI(
	X = sub_two_batch_matrix,
	cand_lab_df = two_batch_matrix_label_df, 
	batch_label = two_batch_matrix_batch,
	cell_size_factor = two_batch_matrix_size_factor)
end_time <- Sys.time()
print(difftime(end_time, start_time))

## -----------------------------------------------------------------------------
knitr::kable(CDI_return2)

## -----------------------------------------------------------------------------
benchmark_return <- calculate_CDI(
	X = sub_two_batch_matrix,
	cand_lab_df = two_batch_matrix_celltype,
	batch_label = two_batch_matrix_batch,
	cell_size_factor = two_batch_matrix_size_factor)

## -----------------------------------------------------------------------------
CDI_lineplot(cdi_dataframe = CDI_return2,
	cdi_type = "CDI_BIC",
	benchmark_celltype_cdi = benchmark_return,
	benchmark_celltype_ncluster = length(unique(two_batch_matrix_celltype))) 

## -----------------------------------------------------------------------------
contingency_heatmap(
	benchmark_label = two_batch_matrix_celltype,
	candidate_label = two_batch_matrix_label_df$Seurat_k5,
	rename_candidate_clusters = TRUE,
	candidate_cluster_names = paste0('cluster', seq_len(length(unique(one_batch_matrix_label_df$Seurat_k5)))))

## -----------------------------------------------------------------------------
sessionInfo()