## ----setup, include=FALSE----------------------------------------------------- libv <- c("lute", "ggplot2") sapply(libv, library, character.only = TRUE) knitr::opts_chunk$set(echo = TRUE) ## ----------------------------------------------------------------------------- path <- system.file("extdata", "scRNAseq/darmanis_example.rda", package="lute") data <- get(load(path)) ## ----------------------------------------------------------------------------- sampleIdVariable <- "experiment_sample_name" oldTypes <- "cell.type"; newTypes <- "k2" # remove non-k2 types filterK2 <- data[[oldTypes]] %in% c("neurons", "oligodendrocytes", "astrocytes", "OPC", "microglia") data <- data[,filterK2] # define new k2 variable data[[newTypes]] <- ifelse(data[[oldTypes]]=="neurons", "neuron", "glial") data[[newTypes]] <- factor(data[[newTypes]]) ## ----------------------------------------------------------------------------- minNuclei <- 20 nucleiPerSample <- table(data[[sampleIdVariable]]) sampleIdVector <- unique(data[[sampleIdVariable]]) sampleIdVector <- sampleIdVector[nucleiPerSample >= minNuclei] sampleIdVector # view ## ----------------------------------------------------------------------------- sampleIdVector <- unlist(lapply(sampleIdVector, function(sampleId){ numTypes <- length( unique( data[,data[[sampleIdVariable]]==sampleId][[newTypes]])) if(numTypes==2){sampleId} })) sampleIdVector ## ----------------------------------------------------------------------------- proportionsList <- lapply(sampleIdVector, function(sampleId){ prop.table(table(data[,data$experiment_sample_name==sampleId]$k2)) }) dfProportions <- do.call(rbind, proportionsList) rownames(dfProportions) <- sampleIdVector colnames(dfProportions) <- paste0(colnames(dfProportions), ".true") dfProportions <- as.data.frame(dfProportions) knitr::kable(dfProportions) # view ## ----------------------------------------------------------------------------- cellScalesVector <- c("glial" = 3, "neuron" = 10) ## ----------------------------------------------------------------------------- assayName <- "counts" pseudobulkList <- lapply(sampleIdVector, function(sampleId){ dataIteration <- data[,data[[sampleIdVariable]]==sampleId] ypb_from_sce( singleCellExperiment = dataIteration, assayName = assayName, cellTypeVariable = newTypes, cellScaleFactors = cellScalesVector) }) dfPseudobulk <- do.call(cbind, pseudobulkList) dfPseudobulk <- as.data.frame(dfPseudobulk) colnames(dfPseudobulk) <- sampleIdVector knitr::kable(head(dfPseudobulk)) ## ----------------------------------------------------------------------------- scaledResult <- lute( singleCellExperiment = data, bulkExpression = as.matrix(dfPseudobulk), cellScaleFactors = cellScalesVector, typemarkerAlgorithm = NULL, cellTypeVariable = newTypes, assayName = assayName) proportions.scaled <- scaledResult[[1]]@predictionsTable knitr::kable(proportions.scaled) # view ## ----------------------------------------------------------------------------- unscaledResult <- lute( singleCellExperiment = data, bulkExpression = as.matrix(dfPseudobulk), typemarkerAlgorithm = NULL, cellTypeVariable = newTypes, assayName = assayName) proportionsUnscaled <- unscaledResult[[1]]@predictionsTable knitr::kable(proportionsUnscaled) # view ## ----------------------------------------------------------------------------- dfProportions$neuron.unscaled <- proportionsUnscaled$neuron dfProportions$neuron.scaled <- proportions.scaled$neuron knitr::kable(dfProportions) # view ## ----------------------------------------------------------------------------- # get bias dfProportions$bias.neuron.unscaled <- dfProportions$neuron.true-dfProportions$neuron.unscaled dfProportions$bias.neuron.scaled <- dfProportions$neuron.true-dfProportions$neuron.scaled # get error dfProportions$error.neuron.unscaled <- abs(dfProportions$bias.neuron.unscaled) dfProportions$error.neuron.scaled <- abs(dfProportions$bias.neuron.scaled) ## ----------------------------------------------------------------------------- dfPlotTall <- rbind( data.frame(true = dfProportions$neuron.true, predicted = dfProportions$neuron.scaled, error = dfProportions$error.neuron.scaled, sampleId = rownames(dfProportions), type = rep("scaled", nrow(dfProportions))), data.frame(true = dfProportions$neuron.true, predicted = dfProportions$neuron.unscaled, error = dfProportions$error.neuron.unscaled, sampleId = rownames(dfProportions), type = rep("unscaled", nrow(dfProportions))) ) dfPlotTall <- as.data.frame(dfPlotTall) ## ----------------------------------------------------------------------------- dfPlotTallNew <- dfPlotTall rmseScaled <- rmse( dfPlotTallNew[dfPlotTallNew$type=="scaled",]$true, dfPlotTall[dfPlotTall$type=="scaled",]$predicted, "mean") rmseUnscaled <- rmse( dfPlotTallNew[dfPlotTallNew$type=="unscaled",]$true, dfPlotTallNew[dfPlotTallNew$type=="unscaled",]$predicted, "mean") dfPlotTallNew$type <- ifelse(grepl("un.*", dfPlotTallNew$type), paste0(dfPlotTallNew$type, " (RMSE = ", round(rmseScaled, 3), ")"), paste0(dfPlotTallNew$type, " (RMSE = ", round(rmseUnscaled, 3), ")")) textSize <- 15 ggplot(dfPlotTallNew, aes(x = true, y = predicted)) + geom_point(size = 4, alpha = 0.5) + geom_abline(slope = 1, intercept = 0) + xlim(0, 1) + ylim(0, 1) + facet_wrap(~type) + theme_bw() + xlab("True") + ylab("Predicted") + theme(text = element_text(size = textSize), axis.text.x = element_text(angle = 45, hjust = 1)) ## ----------------------------------------------------------------------------- ggplot(dfPlotTall, aes(x = type, y = error, color = sampleId)) + geom_jitter(alpha = 0.5, size = 4) + theme_bw() + geom_boxplot(alpha = 0, color = "cyan") + theme(text = element_text(size = textSize), axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Type") + ylab("Error (Neuron)") ## ----------------------------------------------------------------------------- sessionInfo()