## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) options( rmarkdown.html_vignette.check_title = FALSE ) ## ----setup, message = FALSE--------------------------------------------------- library(tidytof) library(dplyr) library(stringr) library(ggplot2) library(tidyr) library(forcats) ## ----eval = FALSE------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("HDCytoData") ## ----message = FALSE, warning = FALSE----------------------------------------- citrus_raw <- HDCytoData::Bodenmiller_BCR_XL_flowSet() citrus_data <- citrus_raw |> as_tof_tbl(sep = "_") ## ----------------------------------------------------------------------------- citrus_metadata <- tibble( file_name = as.character(flowCore::pData(citrus_raw)[[1]]), sample_id = 1:length(file_name), patient = stringr::str_extract(file_name, "patient[:digit:]"), stimulation = stringr::str_extract(file_name, "(BCR-XL)|Reference") ) |> mutate( stimulation = if_else(stimulation == "Reference", "Basal", stimulation) ) citrus_metadata |> head() ## ----------------------------------------------------------------------------- citrus_data <- citrus_data |> left_join(citrus_metadata, by = "sample_id") ## ----------------------------------------------------------------------------- daa_result <- citrus_data |> tof_analyze_abundance( cluster_col = population_id, effect_col = stimulation, group_cols = patient, test_type = "paired", method = "ttest" ) daa_result ## ----message = FALSE---------------------------------------------------------- plot_data <- citrus_data |> mutate(population_id = as.character(population_id)) |> left_join( select(daa_result, population_id, significant, mean_fc), by = "population_id" ) |> dplyr::count(patient, stimulation, population_id, significant, mean_fc, name = "n") |> group_by(patient, stimulation) |> mutate(prop = n / sum(n)) |> ungroup() |> pivot_wider( names_from = stimulation, values_from = c(prop, n), ) |> mutate( diff = `prop_BCR-XL` - prop_Basal, fc = `prop_BCR-XL` / prop_Basal, population_id = fct_reorder(population_id, diff), direction = case_when( mean_fc > 1 & significant == "*" ~ "increase", mean_fc < 1 & significant == "*" ~ "decrease", TRUE ~ NA_character_ ) ) significance_data <- plot_data |> group_by(population_id, significant, direction) |> summarize(diff = max(diff), fc = max(fc)) |> ungroup() plot_data |> ggplot(aes(x = population_id, y = fc, fill = direction)) + geom_violin(trim = FALSE) + geom_hline(yintercept = 1, color = "red", linetype = "dotted", size = 0.5) + geom_point() + geom_text( aes(x = population_id, y = fc, label = significant), data = significance_data, size = 8, nudge_x = 0.2, nudge_y = 0.06 ) + scale_x_discrete(labels = function(x) str_c("cluster ", x)) + scale_fill_manual( values = c("decrease" = "#cd5241", "increase" = "#207394"), na.translate = FALSE ) + labs( x = NULL, y = "Abundance fold-change (stimulated / basal)", fill = "Effect", caption = "Asterisks indicate significance at an adjusted p-value of 0.05" ) ## ----------------------------------------------------------------------------- signaling_markers <- c( "pNFkB_Nd142", "pStat5_Nd150", "pAkt_Sm152", "pStat1_Eu153", "pStat3_Gd158", "pSlp76_Dy164", "pBtk_Er166", "pErk_Er168", "pS6_Yb172", "pZap70_Gd156" ) dea_result <- citrus_data |> tof_preprocess(channel_cols = any_of(signaling_markers)) |> tof_analyze_expression( method = "ttest", cluster_col = population_id, marker_cols = any_of(signaling_markers), effect_col = stimulation, group_cols = patient, test_type = "paired" ) dea_result |> head() ## ----------------------------------------------------------------------------- volcano_data <- dea_result |> mutate( log2_fc = log(mean_fc, base = 2), log_p = -log(p_adj), significance = case_when( p_adj < 0.05 & mean_fc > 1 ~ "increased", p_adj < 0.05 & mean_fc < 1 ~ "decreased", TRUE ~ NA_character_ ), marker = str_extract(marker, ".+_") |> str_remove("_"), pair = str_c(marker, str_c("cluster ", population_id), sep = "@") ) volcano_data |> ggplot(aes(x = log2_fc, y = log_p, fill = significance)) + geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") + geom_hline(yintercept = -log(0.05), linetype = "dashed", color = "red") + geom_point(shape = 21, size = 2) + ggrepel::geom_text_repel( aes(label = pair), data = slice_head(volcano_data, n = 10L), size = 2 ) + scale_fill_manual( values = c("decreased" = "#cd5241", "increased" = "#207394"), na.value = "#cdcdcd" ) + labs( x = "log2(Fold-change)", y = "-log10(p-value)", fill = NULL, caption = "Labels indicate the 10 most significant marker-cluster pairs" ) ## ----------------------------------------------------------------------------- sessionInfo()