## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----inst, eval=FALSE--------------------------------------------------------- # ## install if needed # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("scConform") ## ----libraries, message=FALSE------------------------------------------------- library(scConform) library(SingleCellExperiment) library(VGAM) library(ontoProc) library(MerfishData) library(igraph) library(scuttle) library(scran) library(BiocParallel) `%notin%` <- Negate(`%in%`) ## ----data--------------------------------------------------------------------- # Load data spe_baysor <- MouseIleumPetukhov2021( segmentation = "baysor", use.images = FALSE, use.polygons = FALSE ) # Load ontology cl <- getOnto("cellOnto", "2023") ## ----tags, echo=TRUE, fig.show='hide'----------------------------------------- tags <- c( "CL:0009022", # Stromal "CL:0000236", # B cell "CL:0009080", # Tuft "CL:1000411", # Endothelial "CL:1000335", # Enterocyte "CL:1000326", # Goblet "CL:0002088", # ICC "CL:0009007", # Macrophage + DC "CL:1000343", # Paneth "CL:0000669", # Pericyte "CL:1000278", # Smooth Muscle "CL:0009017", # Stem + TA "CL:0000492", # T (CD4+) "CL:0000625", # T (CD8+) "CL:0017004" # Telocyte ) opi <- graph_from_graphnel(onto_plot2(cl, tags)) ## ----build-ontology----------------------------------------------------------- ## Delete CARO and BFO instances sel_ver <- V(opi)$name[c(grep("CARO", V(opi)$name), grep("BFO", V(opi)$name))] opi1 <- opi - sel_ver ## Rename vertex to match annotations V(opi1)$name[grep("CL:0000236", V(opi1)$name)] <- "B cell" V(opi1)$name[grep("CL:1000411", V(opi1)$name)] <- "Endothelial" V(opi1)$name[grep("CL:1000335", V(opi1)$name)] <- "Enterocyte" V(opi1)$name[grep("CL:1000326", V(opi1)$name)] <- "Goblet" V(opi1)$name[grep("CL:0002088", V(opi1)$name)] <- "ICC" V(opi1)$name[grep("CL:0009007", V(opi1)$name)] <- "Macrophage + DC" V(opi1)$name[grep("CL:1000343", V(opi1)$name)] <- "Paneth" V(opi1)$name[grep("CL:0000669", V(opi1)$name)] <- "Pericyte" V(opi1)$name[grep("CL:1000278", V(opi1)$name)] <- "Smooth Muscle" V(opi1)$name[grep("CL:0009017", V(opi1)$name)] <- "Stem + TA" V(opi1)$name[grep("CL:0009022", V(opi1)$name)] <- "Stromal" V(opi1)$name[grep("CL:0000492", V(opi1)$name)] <- "T (CD4+)" V(opi1)$name[grep("CL:0000625", V(opi1)$name)] <- "T (CD8+)" V(opi1)$name[grep("CL:0017004", V(opi1)$name)] <- "Telocyte" V(opi1)$name[grep("CL:0009080", V(opi1)$name)] <- "Tuft" ## Add the edge from connective tissue cell and telocyte and delete redundant ## nodes opi1 <- add_edges(opi1, c("connective\ntissue cell\nCL:0002320", "Telocyte")) gr <- as_graphnel(opi1) opi2 <- opi1 - c( "somatic\ncell\nCL:0002371", "contractile\ncell\nCL:0000183", "native\ncell\nCL:0000003" ) V(opi2)$name <- trimws(gsub("CL:.*|\\n", " ", V(opi2)$name)) gr1 <- as_graphnel(opi2) ## Plot the final ontology attrs <- list(node = list(shape = "box", fontsize = 15, fixedsize = FALSE)) plot(gr1, attrs = attrs) ## ----preprocess--------------------------------------------------------------- spe_baysor$cell_type <- spe_baysor$leiden_final spe_baysor$cell_type[spe_baysor$cell_type %in% c( "B (Follicular, Circulating)", "B (Plasma)" )] <- "B cell" spe_baysor$cell_type[grep("Enterocyte", spe_baysor$cell_type)] <- "Enterocyte" spe_baysor <- spe_baysor[, spe_baysor$cell_type %notin% c( "Removed", "Myenteric Plexus" )] spe_baysor # See frequencies of cell types table(spe_baysor$cell_type) ## ----split-------------------------------------------------------------------- set.seed(1636) ref <- sample(seq_len(ncol(spe_baysor)), 600) spe_ref <- spe_baysor[, ref] spe_query <- spe_baysor[, -ref] # Reference data spe_ref # Query data spe_query ## ----model, warning=FALSE----------------------------------------------------- # Randomly select 300 cells set.seed(1704) train <- sample(seq_len(ncol(spe_ref)), 300) # Training data spe_train <- spe_ref[, train] spe_train # get HVGs spe_train <- logNormCounts(spe_train) v <- modelGeneVar(spe_train) hvg <- getTopHVGs(v, n = 50) # Extract counts and convert data into a data.frame format df_train <- as.data.frame(t(as.matrix(counts(spe_train[hvg, ])))) df_train$Y <- spe_train$cell_type table(df_train$Y) # Fit multinomial model fit <- vglm(Y ~ ., family = multinomial(refLevel = "B cell"), data = df_train ) ## ----predictions, warning=FALSE----------------------------------------------- spe_cal <- spe_ref[, -train] # Prediction matrix for calibration data df_cal <- as.data.frame(t(as.matrix(counts(spe_cal[hvg, ])))) p_cal <- predict(fit, newdata = df_cal, type = "response") head(round(p_cal, 3)) # Prediction matrix for query data df_test <- as.data.frame(t(as.matrix(counts(spe_query[hvg, ])))) p_test <- predict(fit, newdata = df_test, type = "response") head(round(p_test, 3)) ## ----predsets----------------------------------------------------------------- labels <- colnames(p_test) sets <- getPredictionSets( x_query = p_test, x_cal = p_cal, y_cal = spe_cal$cell_type, alpha = 0.1, follow_ontology = FALSE, labels = labels ) # See the first six prediction sets sets[1:6] ## ----cvg---------------------------------------------------------------------- # Check coverage cvg <- rep(NA, length(sets)) for (i in seq_len(length(sets))) { cvg[i] <- spe_query$cell_type[i] %in% sets[[i]] } mean(cvg) ## ----predsets-sc-------------------------------------------------------------- # Retrieve labels as leaf nodes of the ontology labels <- V(opi2)$name[degree(opi2, mode = "out") == 0] # Create corresponding colData for (i in labels) { colData(spe_cal)[[i]] <- p_cal[, i] colData(spe_query)[[i]] <- p_test[, i] } # Create prediction sets spe_query <- getPredictionSets( x_query = spe_query, x_cal = spe_cal, y_cal = spe_cal$cell_type, alpha = 0.1, follow_ontology = FALSE, pr_name = "pred_set", labels = labels ) # See the new variable pred_set into the colData head(colData(spe_query)) ## ----predset-hier------------------------------------------------------------- spe_query <- getPredictionSets( x_query = spe_query, x_cal = spe_cal, y_cal = spe_cal$cell_type, onto = opi2, alpha = 0.1, follow_ontology = TRUE, method = "full", pr_name = "pred_set_hier", BPPARAM = MulticoreParam(workers = 2), simplify = FALSE ) # See the first six prediction sets head(spe_query$pred_set_hier) ## ----cvg1--------------------------------------------------------------------- # Check coverage cvg1 <- rep(NA, length(spe_query$pred_set_hier)) for (i in seq_len(length(spe_query$pred_set_hier))) { cvg1[i] <- spe_query$cell_type[i] %in% spe_query$pred_set_hier[[i]] } mean(cvg1) ## ----common-ancestor---------------------------------------------------------- spe_query$pred_set_hier_simp <- vapply( spe_query$pred_set_hier, function(x) getCommonAncestor(x, opi2), character(1) ) head(spe_query$pred_set_hier_simp) ## ----plotres------------------------------------------------------------------ plotResult(spe_query$pred_set[[75]], opi2, col_grad = "pink", attrs = attrs, add_scores = FALSE, title = "Conformal Prediction set" ) ## ----plotres2----------------------------------------------------------------- plotResult(spe_query$pred_set_hier[[75]], opi2, col_grad = "pink", attrs = attrs, add_scores = FALSE, title = "Hierarchical prediction set" ) ## ----plotres3----------------------------------------------------------------- plotResult(spe_query$pred_set[[75]], opi2, probs = p_test[75, ], col_grad = c("lemonchiffon", "orange", "darkred"), attrs = attrs, add_scores = TRUE, title = "Conformal Prediction set" ) ## ----plotres4----------------------------------------------------------------- plotResult(spe_query$pred_set_hier[[75]], opi2, probs = p_test[75, ], col_grad = c("lemonchiffon", "orange", "darkred"), attrs = attrs, add_scores = TRUE, title = "Hierarchical Prediction set" ) ## ----SessionInfo, echo=FALSE, message=FALSE, warning=FALSE, comment=NA-------- sessionInfo()