## ----set.up, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE---------- knitr::opts_chunk$set(message=FALSE, collapse = TRUE, comment="") # R packages library(SummarizedExperiment) library(pheatmap) library(devtools) load_all() ## ----load.data---------------------------------------------------------------- # Load pre-computed feature set data(sim_FS) # Load pre-computed input scores data(sim_Scores) ## ----heatmap------------------------------------------------------------------ mat <- SummarizedExperiment::assay(sim_FS) pheatmap::pheatmap(mat[1:100, ], color = c("white", "red"), cluster_rows = FALSE, cluster_cols = FALSE) ## ----ks.method---------------------------------------------------------------- ks_topn_l <- CaDrA::candidate_search( FS = sim_FS, input_score = sim_Scores, method = "ks_pval", # Use Kolmogorow-Smirnow scoring function method_alternative = "less", # Use one-sided hypothesis testing weights = NULL, # If weights is provided, perform a weighted-KS test search_method = "both", # Apply both forward and backward search top_N = 3, # Evaluate top 3 starting points for the search max_size = 10, # Allow at most 10 features in meta-feature matrix do_plot = FALSE, # We will plot it AFTER finding the best hits best_score_only = FALSE # Return all results from the search ) # Now we can fetch the feature set of top N features that corresponded to the best scores over the top N search ks_topn_best_meta <- topn_best(ks_topn_l) # Visualize best meta-feature result meta_plot(topn_best_list = ks_topn_best_meta) ## ----wilcox.method------------------------------------------------------------ wilcox_topn_l <- CaDrA::candidate_search( FS = sim_FS, input_score = sim_Scores, method = "wilcox_pval", # Use Wilcoxon Rank-Sum scoring function method_alternative = "less", # Use one-sided hypothesis testing search_method = "both", # Apply both forward and backward search top_N = 3, # Evaluate top 3 starting points for the search max_size = 10, # Allow at most 10 features in meta-feature matrix do_plot = FALSE, # We will plot it AFTER finding the best hits best_score_only = FALSE # Return all results from the search ) # Now we can fetch the feature set of top N feature that corresponded to the best scores over the top N search wilcox_topn_best_meta <- topn_best(topn_list = wilcox_topn_l) # Visualize best meta-feature result meta_plot(topn_best_list = wilcox_topn_best_meta) ## ----revealer.method---------------------------------------------------------- revealer_topn_l <- CaDrA::candidate_search( FS = sim_FS, input_score = sim_Scores, method = "revealer", # Use REVEALER's CMI scoring function search_method = "both", # Apply both forward and backward search top_N = 3, # Evaluate top 3 starting points for the search max_size = 10, # Allow at most 10 features in meta-feature matrix do_plot = FALSE, # We will plot it AFTER finding the best hits best_score_only = FALSE # Return all results from the search ) # Now we can fetch the ESet of top feature that corresponded to the best scores over the top N search revealer_topn_best_meta <- topn_best(topn_list = revealer_topn_l) # Visualize best meta-feature result meta_plot(topn_best_list = revealer_topn_best_meta) ## ----custom.method------------------------------------------------------------ # A customized function using ks-test customized_ks_rowscore <- function(FS, input_score, meta_feature=NULL, alternative="less", metric="pval"){ # Check if meta_feature is provided if(!is.null(meta_feature)){ # Getting the position of the known meta features locs <- match(meta_feature, row.names(FS)) # Taking the union across the known meta features if(length(meta_feature) > 1) { meta_vector <- as.numeric(ifelse(colSums(FS[meta_feature,]) == 0, 0, 1)) }else{ meta_vector <- as.numeric(FS[meta_feature,]) } # Remove the meta features from the binary feature matrix # and taking logical OR btw the remaining features with the meta vector FS <- base::sweep(FS[-locs, , drop=FALSE], 2, meta_vector, `|`)*1 # Check if there are any features that are all 1s generated from # taking the union between the matrix # We cannot compute statistics for such features and thus they need # to be filtered out if(any(rowSums(FS) == ncol(FS))){ warning("Features with all 1s generated from taking the matrix union ", "will be removed before progressing...\n") FS <- FS[rowSums(FS) != ncol(FS), , drop=FALSE] } } # KS is a ranked-based method # So we need to sort input_score from highest to lowest values input_score <- sort(input_score, decreasing=TRUE) # Re-order the matrix based on the order of input_score FS <- FS[, names(input_score), drop=FALSE] # Compute the scores using the KS method ks <- apply(FS, 1, function(r){ x = input_score[which(r==1)]; y = input_score[which(r==0)]; res <- ks.test(x, y, alternative=alternative) return(c(res$statistic, res$p.value)) }) # Obtain score statistics stat <- ks[1,] # Obtain p-values and change values of 0 to the machine lowest value # to avoid taking -log(0) pval <- ks[2,] pval[which(pval == 0)] <- .Machine$double.xmin # Compute the -log(pval) # Make sure scores has names that match the row names of FS object pval <- -log(pval) # Determine which metric to returned the scores if(metric == "pval"){ scores <- pval }else{ scores <- stat } names(scores) <- rownames(FS) return(scores) } # Search for best features using a custom-defined function custom_topn_l <- CaDrA::candidate_search( FS = SummarizedExperiment::assay(sim_FS), input_score = sim_Scores, method = "custom", # Use custom scoring function custom_function = customized_ks_rowscore, # Use a customized scoring function custom_parameters = NULL, # Additional parameters to pass to custom_function search_method = "both", # Apply both forward and backward search top_N = 3, # Evaluate top 3 starting points for the search max_size = 10, # Allow at most 10 features in meta-feature matrix do_plot = FALSE, # We will plot it AFTER finding the best hits best_score_only = FALSE # Return all results from the search ) # Now we can fetch the feature set of top N feature that corresponded to the best scores over the top N search custom_topn_best_meta <- topn_best(topn_list = custom_topn_l) # Visualize best meta-feature result meta_plot(topn_best_list = custom_topn_best_meta) ## ----RsessionInfo------------------------------------------------------------- sessionInfo()